felipe Sun, 03 Oct 2010 16:01:38 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=303963
Log: - Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8) # In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII # characters, even in UTF-8 mode. However, this can be changed by setting # the PCRE_UCP option. Bug: http://bugs.php.net/52971 (Re-Opened) PCRE-Meta-Characters not working with utf-8 Changed paths: U php/php-src/branches/PHP_5_3/NEWS U php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c A php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt U php/php-src/trunk/ext/pcre/php_pcre.c Modified: php/php-src/branches/PHP_5_3/NEWS =================================================================== --- php/php-src/branches/PHP_5_3/NEWS 2010-10-03 15:48:23 UTC (rev 303962) +++ php/php-src/branches/PHP_5_3/NEWS 2010-10-03 16:01:38 UTC (rev 303963) @@ -22,6 +22,7 @@ - Fixed possible crash in mssql_fetch_batch(). (Kalle) - Fixed inconsistent backlog default value (-1) in FPM on many systems. (fat) +- Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8). (Felipe) - Fixed bug #52947 (segfault when ssl stream option capture_peer_cert_chain used). (Felipe) - Fixed bug #52944 (Invalid write on second and subsequent reads with an Modified: php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c =================================================================== --- php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c 2010-10-03 15:48:23 UTC (rev 303962) +++ php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c 2010-10-03 16:01:38 UTC (rev 303963) @@ -350,7 +350,14 @@ case 'S': do_study = 1; break; case 'U': coptions |= PCRE_UNGREEDY; break; case 'X': coptions |= PCRE_EXTRA; break; - case 'u': coptions |= PCRE_UTF8; break; + case 'u': coptions |= PCRE_UTF8; + /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII + characters, even in UTF-8 mode. However, this can be changed by setting + the PCRE_UCP option. */ +#ifdef PCRE_UCP + coptions |= PCRE_UCP; +#endif + break; /* Custom preg options */ case 'e': poptions |= PREG_REPLACE_EVAL; break; Added: php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt (rev 0) +++ php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt 2010-10-03 16:01:38 UTC (rev 303963) @@ -0,0 +1,43 @@ +--TEST-- +Bug #52971 (PCRE-Meta-Characters not working with utf-8) +--SKIPIF-- +<?php if ((double)PCRE_VERSION < 8.1) die('skip PCRE_VERSION >= 8.1 is required!'); ?> +--FILE-- +<?php + +$message = 'Der ist ein Süßwasserpool Süsswasserpool ... verschiedene Wassersportmöglichkeiten bei ...'; + +$pattern = '/\bwasser/iu'; +preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE); +var_dump($match); + +$pattern = '/[^\w]wasser/iu'; +preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE); +var_dump($match); + +?> +--EXPECTF-- +array(1) { + [0]=> + array(1) { + [0]=> + array(2) { + [0]=> + string(6) "Wasser" + [1]=> + int(61) + } + } +} +array(1) { + [0]=> + array(1) { + [0]=> + array(2) { + [0]=> + string(7) " Wasser" + [1]=> + int(60) + } + } +} Property changes on: php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt ___________________________________________________________________ Added: svn:keywords + Id Rev Revision Added: svn:eol-style + native Modified: php/php-src/trunk/ext/pcre/php_pcre.c =================================================================== --- php/php-src/trunk/ext/pcre/php_pcre.c 2010-10-03 15:48:23 UTC (rev 303962) +++ php/php-src/trunk/ext/pcre/php_pcre.c 2010-10-03 16:01:38 UTC (rev 303963) @@ -350,7 +350,14 @@ case 'S': do_study = 1; break; case 'U': coptions |= PCRE_UNGREEDY; break; case 'X': coptions |= PCRE_EXTRA; break; - case 'u': coptions |= PCRE_UTF8; break; + case 'u': coptions |= PCRE_UTF8; + /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII + characters, even in UTF-8 mode. However, this can be changed by setting + the PCRE_UCP option. */ +#ifdef PCRE_UCP + coptions |= PCRE_UCP; +#endif + break; /* Custom preg options */ case 'e': poptions |= PREG_REPLACE_EVAL; break;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php