felipe                                   Sun, 03 Oct 2010 16:01:38 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=303963

Log:
- Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8)
#   In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
#       characters, even in UTF-8 mode. However, this can be changed by setting
#       the PCRE_UCP option.

Bug: http://bugs.php.net/52971 (Re-Opened) PCRE-Meta-Characters not working 
with utf-8
      
Changed paths:
    U   php/php-src/branches/PHP_5_3/NEWS
    U   php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c
    A   php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt
    U   php/php-src/trunk/ext/pcre/php_pcre.c

Modified: php/php-src/branches/PHP_5_3/NEWS
===================================================================
--- php/php-src/branches/PHP_5_3/NEWS   2010-10-03 15:48:23 UTC (rev 303962)
+++ php/php-src/branches/PHP_5_3/NEWS   2010-10-03 16:01:38 UTC (rev 303963)
@@ -22,6 +22,7 @@
 - Fixed possible crash in mssql_fetch_batch(). (Kalle)
 - Fixed inconsistent backlog default value (-1) in FPM on many systems. (fat)

+- Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8). (Felipe)
 - Fixed bug #52947 (segfault when ssl stream option capture_peer_cert_chain
   used). (Felipe)
 - Fixed bug #52944 (Invalid write on second and subsequent reads with an

Modified: php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c    2010-10-03 15:48:23 UTC 
(rev 303962)
+++ php/php-src/branches/PHP_5_3/ext/pcre/php_pcre.c    2010-10-03 16:01:38 UTC 
(rev 303963)
@@ -350,7 +350,14 @@
                        case 'S':       do_study  = 1;                          
        break;
                        case 'U':       coptions |= PCRE_UNGREEDY;              
break;
                        case 'X':       coptions |= PCRE_EXTRA;                 
break;
-                       case 'u':       coptions |= PCRE_UTF8;                  
break;
+                       case 'u':       coptions |= PCRE_UTF8;
+       /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only 
ASCII
+       characters, even in UTF-8 mode. However, this can be changed by setting
+       the PCRE_UCP option. */
+#ifdef PCRE_UCP
+                                               coptions |= PCRE_UCP;
+#endif
+                               break;

                        /* Custom preg options */
                        case 'e':       poptions |= PREG_REPLACE_EVAL;  break;

Added: php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt                   
        (rev 0)
+++ php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt   2010-10-03 
16:01:38 UTC (rev 303963)
@@ -0,0 +1,43 @@
+--TEST--
+Bug #52971 (PCRE-Meta-Characters not working with utf-8)
+--SKIPIF--
+<?php if ((double)PCRE_VERSION < 8.1) die('skip PCRE_VERSION >= 8.1 is 
required!'); ?>
+--FILE--
+<?php
+
+$message = 'Der ist ein Süßwasserpool Süsswasserpool ... verschiedene 
Wassersportmöglichkeiten bei ...';
+
+$pattern = '/\bwasser/iu';
+preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE);
+var_dump($match);
+
+$pattern = '/[^\w]wasser/iu';
+preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE);
+var_dump($match);
+
+?>
+--EXPECTF--
+array(1) {
+  [0]=>
+  array(1) {
+    [0]=>
+    array(2) {
+      [0]=>
+      string(6) "Wasser"
+      [1]=>
+      int(61)
+    }
+  }
+}
+array(1) {
+  [0]=>
+  array(1) {
+    [0]=>
+    array(2) {
+      [0]=>
+      string(7) " Wasser"
+      [1]=>
+      int(60)
+    }
+  }
+}


Property changes on: php/php-src/branches/PHP_5_3/ext/pcre/tests/bug52971.phpt
___________________________________________________________________
Added: svn:keywords
   + Id Rev Revision
Added: svn:eol-style
   + native

Modified: php/php-src/trunk/ext/pcre/php_pcre.c
===================================================================
--- php/php-src/trunk/ext/pcre/php_pcre.c       2010-10-03 15:48:23 UTC (rev 
303962)
+++ php/php-src/trunk/ext/pcre/php_pcre.c       2010-10-03 16:01:38 UTC (rev 
303963)
@@ -350,7 +350,14 @@
                        case 'S':       do_study  = 1;                          
        break;
                        case 'U':       coptions |= PCRE_UNGREEDY;              
break;
                        case 'X':       coptions |= PCRE_EXTRA;                 
break;
-                       case 'u':       coptions |= PCRE_UTF8;                  
break;
+                       case 'u':       coptions |= PCRE_UTF8;
+       /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only 
ASCII
+       characters, even in UTF-8 mode. However, this can be changed by setting
+       the PCRE_UCP option. */
+#ifdef PCRE_UCP
+                                               coptions |= PCRE_UCP;
+#endif
+                               break;

                        /* Custom preg options */
                        case 'e':       poptions |= PREG_REPLACE_EVAL;  break;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to