moriyoshi               Sat Jan 31 17:36:34 2004 EDT

  Modified files:              
    /php-src    NEWS 
    /php-src/ext/pcre   php_pcre.c php_pcre.h 
  Log:
  - Fix bug #27103 (preg_split('//u') incorrectly splits UTF-8 strings into octets).
  
  
http://cvs.php.net/diff.php/php-src/NEWS?r1=1.1592&r2=1.1593&ty=u
Index: php-src/NEWS
diff -u php-src/NEWS:1.1592 php-src/NEWS:1.1593
--- php-src/NEWS:1.1592 Fri Jan 30 04:21:05 2004
+++ php-src/NEWS        Sat Jan 31 17:36:32 2004
@@ -14,6 +14,8 @@
   (Derick)
 - Fixed problems with longlong values in mysqli. (Georg)
 - Fixed class name case preserving of user defined classes. (Marcus)
+- Fixed bug #27103 (preg_split('//u') incorrectly splits UTF-8 strings into
+  octets). (Moriyoshi)
 - Fixed bug #27042 (SPL: SeekableIterator seek() broken). (Marcus)
 - Fixed bug #27008 (Every class method can be called as static). (Marcus)
 - Fixed bug #26938 (exec() has problems reading long lines).
http://cvs.php.net/diff.php/php-src/ext/pcre/php_pcre.c?r1=1.153&r2=1.154&ty=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.153 php-src/ext/pcre/php_pcre.c:1.154
--- php-src/ext/pcre/php_pcre.c:1.153   Sat Jan 31 16:56:42 2004
+++ php-src/ext/pcre/php_pcre.c Sat Jan 31 17:36:33 2004
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_pcre.c,v 1.153 2004/01/31 21:56:42 moriyoshi Exp $ */
+/* $Id: php_pcre.c,v 1.154 2004/01/31 22:36:33 moriyoshi Exp $ */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -132,7 +132,17 @@
 
 /* {{{ pcre_get_compiled_regex
  */
-PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int 
*preg_options TSRMLS_DC) {
+PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int 
*preg_options TSRMLS_DC)
+{
+       int compile_options;
+       return pcre_get_compiled_regex_ex(regex, extra, preg_options, 
&compile_options);
+}
+/* }}} */
+
+/* {{{ pcre_get_compiled_regex_ex
+ */
+PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int 
*preg_options, int *compile_options TSRMLS_DC)
+{
        pcre                            *re = NULL;
        int                                      coptions = 0;
        int                                      soptions = 0;
@@ -162,6 +172,7 @@
 #endif
                        *extra = pce->extra;
                        *preg_options = pce->preg_options;
+                       *compile_options = pce->compile_options;
                        return pce->re;
 #if HAVE_SETLOCALE
                }
@@ -236,7 +247,7 @@
 
        /* Clear out preg options */
        *preg_options = 0;
-       
+
        /* Parse through the options, setting appropriate flags.  Display
           a warning if we encounter an unknown modifier. */    
        while (*pp != 0) {
@@ -297,13 +308,15 @@
        }
 
        *preg_options = poptions;
-       
+       *compile_options = coptions;
+
        efree(pattern);
 
        /* Store the compiled pattern and extra info in the cache. */
        new_entry.re = re;
        new_entry.extra = *extra;
        new_entry.preg_options = poptions;
+       new_entry.compile_options = coptions;
 #if HAVE_SETLOCALE
        new_entry.locale = pestrdup(locale, 1);
        new_entry.tables = tables;
@@ -1168,11 +1181,14 @@
                                   **limit,                             /* Number of 
pieces to return */
                                   **flags;
        pcre                    *re = NULL;                     /* Compiled regular 
expression */
+       pcre                    *re_bump = NULL;        /* Regex instance for empty 
matches */
        pcre_extra              *extra = NULL;          /* Holds results of studying */
+       pcre_extra              *extra_bump = NULL;     /* Almost dummy */
        int                             *offsets;                       /* Array of 
subpattern offsets */
        int                              size_offsets;          /* Size of the offsets 
array */
        int                              exoptions = 0;         /* Execution options */
        int                              preg_options = 0;      /* Custom preg options 
*/
+       int                      coptions = 0;          /* Custom preg options */
        int                              argc;                          /* Argument 
count */
        int                              limit_val = -1;        /* Integer value of 
limit */
        int                              no_empty = 0;          /* If NO_EMPTY flag is 
set */
@@ -1210,7 +1226,7 @@
        convert_to_string_ex(subject);
        
        /* Compile regex or get it from cache. */
-       if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options 
TSRMLS_CC)) == NULL) {
+       if ((re = pcre_get_compiled_regex_ex(Z_STRVAL_PP(regex), &extra, 
&preg_options, &coptions TSRMLS_CC)) == NULL) {
                RETURN_FALSE;
        }
        
@@ -1284,8 +1300,26 @@
                           the start offset, and continue. Fudge the offset values
                           to achieve this, unless we're already at the end of the 
string. */
                        if (g_notempty != 0 && start_offset < Z_STRLEN_PP(subject)) {
-                               offsets[0] = start_offset;
-                               offsets[1] = start_offset + 1;
+                               if (coptions & PCRE_UTF8) {
+                                       if (re_bump == NULL) {
+                                               int dummy;
+
+                                               if ((re_bump = 
pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
+                                                       RETURN_FALSE;
+                                               }
+                                       }
+                                       count = pcre_exec(re_bump, extra_bump, 
Z_STRVAL_PP(subject),
+                                                         Z_STRLEN_PP(subject), 
start_offset,
+                                                         exoptions, offsets, 
size_offsets);
+                                       if (count < 1) {
+                                               php_error_docref(NULL 
TSRMLS_CC,E_NOTICE, "Unknown error");
+                                               offsets[0] = start_offset;
+                                               offsets[1] = start_offset + 1;
+                                       }
+                               } else {
+                                       offsets[0] = start_offset;
+                                       offsets[1] = start_offset + 1;
+                               }
                        } else
                                break;
                }
http://cvs.php.net/diff.php/php-src/ext/pcre/php_pcre.h?r1=1.37&r2=1.38&ty=u
Index: php-src/ext/pcre/php_pcre.h
diff -u php-src/ext/pcre/php_pcre.h:1.37 php-src/ext/pcre/php_pcre.h:1.38
--- php-src/ext/pcre/php_pcre.h:1.37    Thu Jan  8 12:32:39 2004
+++ php-src/ext/pcre/php_pcre.h Sat Jan 31 17:36:33 2004
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
  
-/* $Id: php_pcre.h,v 1.37 2004/01/08 17:32:39 sniper Exp $ */
+/* $Id: php_pcre.h,v 1.38 2004/01/31 22:36:33 moriyoshi Exp $ */
 
 #ifndef PHP_PCRE_H
 #define PHP_PCRE_H
@@ -43,6 +43,7 @@
 
 PHPAPI char *php_pcre_replace(char *regex,   int regex_len, char *subject, int 
subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit 
TSRMLS_DC);
 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *options 
TSRMLS_DC);
+PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int 
*preg_options, int *coptions TSRMLS_DC);
 
 extern zend_module_entry pcre_module_entry;
 #define pcre_module_ptr &pcre_module_entry
@@ -51,6 +52,7 @@
        pcre *re;
        pcre_extra *extra;
        int preg_options;
+       int compile_options;
 #if HAVE_SETLOCALE
        char *locale;
        unsigned const char *tables;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to