- added Smarty::$_UTF8_MODIFIER for proper PCRE charset handling (Forum Topic 20452)

This commit is contained in:
rodneyrehm
2011-12-18 18:48:07 +00:00
14 changed files with 42 additions and 22 deletions

View File

@@ -5,6 +5,7 @@
- changed unloadFilter() to not return a boolean in favor of chaining and API conformity - changed unloadFilter() to not return a boolean in favor of chaining and API conformity
- bugfix unregisterObject() raised notice when object to unregister did not exist - bugfix unregisterObject() raised notice when object to unregister did not exist
- changed internals to use Smarty::$_MBSTRING ($_CHARSET, $_DATE_FORMAT) for better unit testing - changed internals to use Smarty::$_MBSTRING ($_CHARSET, $_DATE_FORMAT) for better unit testing
- added Smarty::$_UTF8_MODIFIER for proper PCRE charset handling (Forum Topic 20452)
17.12.2011 17.12.2011
- improvement of compiling speed by new handling of plain text blocks in the lexer/parser (issue 68) - improvement of compiling speed by new handling of plain text blocks in the lexer/parser (issue 68)

View File

@@ -61,9 +61,15 @@ if (!defined('SMARTY_MBSTRING')) {
} }
if (!defined('SMARTY_RESOURCE_CHAR_SET')) { if (!defined('SMARTY_RESOURCE_CHAR_SET')) {
// UTF-8 can only be done properly when mbstring is available! // UTF-8 can only be done properly when mbstring is available!
/**
* @deprecated in favor of Smarty::$_CHARSET
*/
define('SMARTY_RESOURCE_CHAR_SET', SMARTY_MBSTRING ? 'UTF-8' : 'ISO-8859-1'); define('SMARTY_RESOURCE_CHAR_SET', SMARTY_MBSTRING ? 'UTF-8' : 'ISO-8859-1');
} }
if (!defined('SMARTY_RESOURCE_DATE_FORMAT')) { if (!defined('SMARTY_RESOURCE_DATE_FORMAT')) {
/**
* @deprecated in favor of Smarty::$_DATE_FORMAT
*/
define('SMARTY_RESOURCE_DATE_FORMAT', '%b %e, %Y'); define('SMARTY_RESOURCE_DATE_FORMAT', '%b %e, %Y');
} }
@@ -166,12 +172,25 @@ class Smarty extends Smarty_Internal_TemplateBase {
* contains directories outside of SMARTY_DIR that are to be muted by muteExpectedErrors() * contains directories outside of SMARTY_DIR that are to be muted by muteExpectedErrors()
*/ */
public static $_muted_directories = array(); public static $_muted_directories = array();
/**
* Flag denoting if Multibyte String functions are available
*/
public static $_MBSTRING = SMARTY_MBSTRING; public static $_MBSTRING = SMARTY_MBSTRING;
/**
* The character set to adhere to (e.g. "UTF-8")
*/
public static $_CHARSET = SMARTY_RESOURCE_CHAR_SET; public static $_CHARSET = SMARTY_RESOURCE_CHAR_SET;
/**
* The date format to be used internally
* (accepts date() and strftime())
*/
public static $_DATE_FORMAT = SMARTY_RESOURCE_DATE_FORMAT; public static $_DATE_FORMAT = SMARTY_RESOURCE_DATE_FORMAT;
/**
* Flag denoting if PCRE should run in UTF-8 mode
*/
public static $_UTF8_MODIFIER = 'u'; public static $_UTF8_MODIFIER = 'u';
/**#@+ /**#@+
* variables * variables
*/ */
@@ -1437,7 +1456,7 @@ class Smarty extends Smarty_Internal_TemplateBase {
} }
} }
// let PREG treat strings as ISO-8859-1 if we're not dealing with UTF-8 // let PCRE (preg_*) treat strings as ISO-8859-1 if we're not dealing with UTF-8
if (Smarty::$_CHARSET !== 'UTF-8') { if (Smarty::$_CHARSET !== 'UTF-8') {
Smarty::$_UTF8_MODIFIER = ''; Smarty::$_UTF8_MODIFIER = '';
} }

View File

@@ -84,7 +84,7 @@ function smarty_block_textformat($params, $content, $template, &$repeat)
continue; continue;
} }
// convert mult. spaces & special chars to single space // convert mult. spaces & special chars to single space
$_paragraph = preg_replace(array('!\s+!u', '!(^\s+)|(\s+$)!u'), array(' ', ''), $_paragraph); $_paragraph = preg_replace(array('!\s+!' . Smarty::$_UTF8_MODIFIER, '!(^\s+)|(\s+$)!' . Smarty::$_UTF8_MODIFIER), array(' ', ''), $_paragraph);
// indent first line // indent first line
if ($indent_first > 0) { if ($indent_first > 0) {
$_paragraph = str_repeat($indent_char, $indent_first) . $_paragraph; $_paragraph = str_repeat($indent_char, $indent_first) . $_paragraph;

View File

@@ -177,7 +177,7 @@ function smarty_function_html_checkboxes_output($name, $value, $output, $selecte
if ($labels) { if ($labels) {
if ($label_ids) { if ($label_ids) {
$_id = smarty_function_escape_special_chars(preg_replace('![^\w\-\.]!u', '_', $name . '_' . $value)); $_id = smarty_function_escape_special_chars(preg_replace('![^\w\-\.]!' . Smarty::$_UTF8_MODIFIER, '_', $name . '_' . $value));
$_output .= '<label for="' . $_id . '">'; $_output .= '<label for="' . $_id . '">';
} else { } else {
$_output .= '<label>'; $_output .= '<label>';

View File

@@ -165,7 +165,7 @@ function smarty_function_html_radios_output($name, $value, $output, $selected, $
if ($labels) { if ($labels) {
if ($label_ids) { if ($label_ids) {
$_id = smarty_function_escape_special_chars(preg_replace('![^\w\-\.]!u', '_', $name . '_' . $value)); $_id = smarty_function_escape_special_chars(preg_replace('![^\w\-\.]!' . Smarty::$_UTF8_MODIFIER, '_', $name . '_' . $value));
$_output .= '<label for="' . $_id . '">'; $_output .= '<label for="' . $_id . '">';
} else { } else {
$_output .= '<label>'; $_output .= '<label>';

View File

@@ -130,7 +130,7 @@ function smarty_function_mailto($params, $template)
} }
$address_encode = ''; $address_encode = '';
for ($x = 0, $_length = strlen($address); $x < $_length; $x++) { for ($x = 0, $_length = strlen($address); $x < $_length; $x++) {
if (preg_match('!\w!u', $address[$x])) { if (preg_match('!\w!' . Smarty::$_UTF8_MODIFIER, $address[$x])) {
$address_encode .= '%' . bin2hex($address[$x]); $address_encode .= '%' . bin2hex($address[$x]);
} else { } else {
$address_encode .= $address[$x]; $address_encode .= $address[$x];

View File

@@ -30,17 +30,17 @@ function smarty_modifier_capitalize($string, $uc_digits = false, $lc_rest = fals
$upper_string = mb_convert_case( $string, MB_CASE_TITLE, Smarty::$_CHARSET ); $upper_string = mb_convert_case( $string, MB_CASE_TITLE, Smarty::$_CHARSET );
} else { } else {
// uppercase word breaks // uppercase word breaks
$upper_string = preg_replace("!(^|[^\p{L}'])([\p{Ll}])!ueS", "stripslashes('\\1').mb_convert_case(stripslashes('\\2'),MB_CASE_UPPER, '" . addslashes(Smarty::$_CHARSET) . "')", $string); $upper_string = preg_replace("!(^|[^\p{L}'])([\p{Ll}])!eS" . Smarty::$_UTF8_MODIFIER, "stripslashes('\\1').mb_convert_case(stripslashes('\\2'),MB_CASE_UPPER, '" . addslashes(Smarty::$_CHARSET) . "')", $string);
} }
// check uc_digits case // check uc_digits case
if (!$uc_digits) { if (!$uc_digits) {
if (preg_match_all("!\b([\p{L}]*[\p{N}]+[\p{L}]*)\b!u", $string, $matches, PREG_OFFSET_CAPTURE)) { if (preg_match_all("!\b([\p{L}]*[\p{N}]+[\p{L}]*)\b!" . Smarty::$_UTF8_MODIFIER, $string, $matches, PREG_OFFSET_CAPTURE)) {
foreach($matches[1] as $match) { foreach($matches[1] as $match) {
$upper_string = substr_replace($upper_string, mb_strtolower($match[0], Smarty::$_CHARSET), $match[1], strlen($match[0])); $upper_string = substr_replace($upper_string, mb_strtolower($match[0], Smarty::$_CHARSET), $match[1], strlen($match[0]));
} }
} }
} }
$upper_string = preg_replace("!((^|\s)['\"])(\w)!ue", "stripslashes('\\1').mb_convert_case(stripslashes('\\3'),MB_CASE_UPPER, '" . addslashes(Smarty::$_CHARSET) . "')", $upper_string); $upper_string = preg_replace("!((^|\s)['\"])(\w)!e" . Smarty::$_UTF8_MODIFIER, "stripslashes('\\1').mb_convert_case(stripslashes('\\3'),MB_CASE_UPPER, '" . addslashes(Smarty::$_CHARSET) . "')", $upper_string);
return $upper_string; return $upper_string;
} }
@@ -49,16 +49,16 @@ function smarty_modifier_capitalize($string, $uc_digits = false, $lc_rest = fals
$string = strtolower($string); $string = strtolower($string);
} }
// uppercase (including hyphenated words) // uppercase (including hyphenated words)
$upper_string = preg_replace("!(^|[^\p{L}'])([\p{Ll}])!ueS", "stripslashes('\\1').ucfirst(stripslashes('\\2'))", $string); $upper_string = preg_replace("!(^|[^\p{L}'])([\p{Ll}])!eS" . Smarty::$_UTF8_MODIFIER, "stripslashes('\\1').ucfirst(stripslashes('\\2'))", $string);
// check uc_digits case // check uc_digits case
if (!$uc_digits) { if (!$uc_digits) {
if (preg_match_all("!\b([\p{L}]*[\p{N}]+[\p{L}]*)\b!u", $string, $matches, PREG_OFFSET_CAPTURE)) { if (preg_match_all("!\b([\p{L}]*[\p{N}]+[\p{L}]*)\b!" . Smarty::$_UTF8_MODIFIER, $string, $matches, PREG_OFFSET_CAPTURE)) {
foreach($matches[1] as $match) { foreach($matches[1] as $match) {
$upper_string = substr_replace($upper_string, strtolower($match[0]), $match[1], strlen($match[0])); $upper_string = substr_replace($upper_string, strtolower($match[0]), $match[1], strlen($match[0]));
} }
} }
} }
$upper_string = preg_replace("!((^|\s)['\"])(\w)!ue", "stripslashes('\\1').strtoupper(stripslashes('\\3'))", $upper_string); $upper_string = preg_replace("!((^|\s)['\"])(\w)!e" . Smarty::$_UTF8_MODIFIER, "stripslashes('\\1').strtoupper(stripslashes('\\3'))", $upper_string);
return $upper_string; return $upper_string;
} }

View File

@@ -21,7 +21,7 @@
function smarty_modifier_spacify($string, $spacify_char = ' ') function smarty_modifier_spacify($string, $spacify_char = ' ')
{ {
// well… what about charsets besides latin and UTF-8? // well… what about charsets besides latin and UTF-8?
return implode($spacify_char, preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY)); return implode($spacify_char, preg_split('//' . Smarty::$_UTF8_MODIFIER, $string, -1, PREG_SPLIT_NO_EMPTY));
} }
?> ?>

View File

@@ -32,7 +32,7 @@ function smarty_modifier_truncate($string, $length = 80, $etc = '...', $break_wo
if (mb_strlen($string, Smarty::$_CHARSET) > $length) { if (mb_strlen($string, Smarty::$_CHARSET) > $length) {
$length -= min($length, mb_strlen($etc, Smarty::$_CHARSET)); $length -= min($length, mb_strlen($etc, Smarty::$_CHARSET));
if (!$break_words && !$middle) { if (!$break_words && !$middle) {
$string = preg_replace('/\s+?(\S+)?$/u', '', mb_substr($string, 0, $length + 1, Smarty::$_CHARSET)); $string = preg_replace('/\s+?(\S+)?$/' . Smarty::$_UTF8_MODIFIER, '', mb_substr($string, 0, $length + 1, Smarty::$_CHARSET));
} }
if (!$middle) { if (!$middle) {
return mb_substr($string, 0, $length, Smarty::$_CHARSET) . $etc; return mb_substr($string, 0, $length, Smarty::$_CHARSET) . $etc;

View File

@@ -21,7 +21,7 @@
function smarty_modifiercompiler_count_characters($params, $compiler) function smarty_modifiercompiler_count_characters($params, $compiler)
{ {
if (!isset($params[1]) || $params[1] != 'true') { if (!isset($params[1]) || $params[1] != 'true') {
return 'preg_match_all(\'/[^\s]/u\',' . $params[0] . ', $tmp)'; return 'preg_match_all(\'/[^\s]/' . Smarty::$_UTF8_MODIFIER . '\',' . $params[0] . ', $tmp)';
} }
if (Smarty::$_MBSTRING) { if (Smarty::$_MBSTRING) {
return 'mb_strlen(' . $params[0] . ', \'' . addslashes(Smarty::$_CHARSET) . '\')'; return 'mb_strlen(' . $params[0] . ', \'' . addslashes(Smarty::$_CHARSET) . '\')';

View File

@@ -22,7 +22,7 @@
function smarty_modifiercompiler_count_sentences($params, $compiler) function smarty_modifiercompiler_count_sentences($params, $compiler)
{ {
// find periods, question marks, exclamation marks with a word before but not after. // find periods, question marks, exclamation marks with a word before but not after.
return 'preg_match_all("#\w[\.\?\!](\W|$)#uS", ' . $params[0] . ', $tmp)'; return 'preg_match_all("#\w[\.\?\!](\W|$)#S' . Smarty::$_UTF8_MODIFIER . '", ' . $params[0] . ', $tmp)';
} }
?> ?>

View File

@@ -21,9 +21,9 @@
function smarty_modifiercompiler_count_words($params, $compiler) function smarty_modifiercompiler_count_words($params, $compiler)
{ {
if (Smarty::$_MBSTRING) { if (Smarty::$_MBSTRING) {
// return 'preg_match_all(\'#[\w\pL]+#u\', ' . $params[0] . ', $tmp)'; // return 'preg_match_all(\'#[\w\pL]+#' . Smarty::$_UTF8_MODIFIER . '\', ' . $params[0] . ', $tmp)';
// expression taken from http://de.php.net/manual/en/function.str-word-count.php#85592 // expression taken from http://de.php.net/manual/en/function.str-word-count.php#85592
return 'preg_match_all(\'/\p{L}[\p{L}\p{Mn}\p{Pd}\\\'\x{2019}]*/u\', ' . $params[0] . ', $tmp)'; return 'preg_match_all(\'/\p{L}[\p{L}\p{Mn}\p{Pd}\\\'\x{2019}]*/' . Smarty::$_UTF8_MODIFIER . '\', ' . $params[0] . ', $tmp)';
} }
// no MBString fallback // no MBString fallback
return 'str_word_count(' . $params[0] . ')'; return 'str_word_count(' . $params[0] . ')';

View File

@@ -27,7 +27,7 @@ function smarty_modifiercompiler_strip($params, $compiler)
if (!isset($params[1])) { if (!isset($params[1])) {
$params[1] = "' '"; $params[1] = "' '";
} }
return "preg_replace('!\s+!u', {$params[1]},{$params[0]})"; return "preg_replace('!\s+!" . Smarty::$_UTF8_MODIFIER . "', {$params[1]},{$params[0]})";
} }
?> ?>

View File

@@ -22,7 +22,7 @@ if(!function_exists('smarty_mb_wordwrap')) {
function smarty_mb_wordwrap($str, $width=75, $break="\n", $cut=false) function smarty_mb_wordwrap($str, $width=75, $break="\n", $cut=false)
{ {
// break words into tokens using white space as a delimiter // break words into tokens using white space as a delimiter
$tokens = preg_split('!(\s)!uS', $str, -1, PREG_SPLIT_NO_EMPTY + PREG_SPLIT_DELIM_CAPTURE); $tokens = preg_split('!(\s)!S' . Smarty::$_UTF8_MODIFIER, $str, -1, PREG_SPLIT_NO_EMPTY + PREG_SPLIT_DELIM_CAPTURE);
$length = 0; $length = 0;
$t = ''; $t = '';
$_previous = false; $_previous = false;
@@ -37,14 +37,14 @@ if(!function_exists('smarty_mb_wordwrap')) {
$length = 0; $length = 0;
if ($cut) { if ($cut) {
$_tokens = preg_split('!(.{' . $width . '})!uS', $_token, -1, PREG_SPLIT_NO_EMPTY + PREG_SPLIT_DELIM_CAPTURE); $_tokens = preg_split('!(.{' . $width . '})!S' . Smarty::$_UTF8_MODIFIER, $_token, -1, PREG_SPLIT_NO_EMPTY + PREG_SPLIT_DELIM_CAPTURE);
// broken words go on a new line // broken words go on a new line
$t .= $break; $t .= $break;
} }
} }
foreach ($_tokens as $token) { foreach ($_tokens as $token) {
$_space = !!preg_match('!^\s$!uS', $token); $_space = !!preg_match('!^\s$!S' . Smarty::$_UTF8_MODIFIER, $token);
$token_length = mb_strlen($token, Smarty::$_CHARSET); $token_length = mb_strlen($token, Smarty::$_CHARSET);
$length += $token_length; $length += $token_length;