nlopess         Sat Mar  8 11:58:12 2008 UTC

  Modified files:              (Branch: PHP_5_3)
    /php-src/ext/pcre   php_pcre.c 
  Log:
  implement #44336: optimize utf8 string matching
  add PREG_BAD_UTF8_OFFSET_ERROR constant
  
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.168.2.9.2.21.2.12&r2=1.168.2.9.2.21.2.13&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.12 
php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.13
--- php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.12     Mon Mar  3 11:11:43 2008
+++ php-src/ext/pcre/php_pcre.c Sat Mar  8 11:58:12 2008
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_pcre.c,v 1.168.2.9.2.21.2.12 2008/03/03 11:11:43 dmitry Exp $ */
+/* $Id: php_pcre.c,v 1.168.2.9.2.21.2.13 2008/03/08 11:58:12 nlopess Exp $ */
 
 #include "php.h"
 #include "php_ini.h"
@@ -48,7 +48,8 @@
        PHP_PCRE_INTERNAL_ERROR,
        PHP_PCRE_BACKTRACK_LIMIT_ERROR,
        PHP_PCRE_RECURSION_LIMIT_ERROR,
-       PHP_PCRE_BAD_UTF8_ERROR
+       PHP_PCRE_BAD_UTF8_ERROR,
+       PHP_PCRE_BAD_UTF8_OFFSET_ERROR
 };
 
 
@@ -72,6 +73,10 @@
                        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
                        break;
 
+               case PCRE_ERROR_BADUTF8_OFFSET:
+                       preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
+                       break;
+
                default:
                        preg_code = PHP_PCRE_INTERNAL_ERROR;
                        break;
@@ -145,6 +150,7 @@
        REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", 
PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", 
PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, 
CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", 
PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
        REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), 
CONST_CS | CONST_PERSISTENT);
 
        return SUCCESS;
@@ -614,7 +620,10 @@
                count = pcre_exec(pce->re, extra, subject, subject_len, 
start_offset,
                                                  exoptions|g_notempty, 
offsets, size_offsets);
 
-               /* Check for too many substrings condition. */  
+               /* the string was already proved to be valid UTF-8 */
+               exoptions |= PCRE_NO_UTF8_CHECK;
+
+               /* Check for too many substrings condition. */
                if (count == 0) {
                        php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, 
but too many substrings");
                        count = size_offsets/3;
@@ -1034,7 +1043,10 @@
                /* Execute the regular expression. */
                count = pcre_exec(pce->re, extra, subject, subject_len, 
start_offset,
                                                  exoptions|g_notempty, 
offsets, size_offsets);
-               
+
+               /* the string was already proved to be valid UTF-8 */
+               exoptions |= PCRE_NO_UTF8_CHECK;
+
                /* Check for too many substrings condition. */
                if (count == 0) {
                        php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but 
too many substrings");
@@ -1472,6 +1484,9 @@
                                                  subject_len, start_offset,
                                                  exoptions|g_notempty, 
offsets, size_offsets);
 
+               /* the string was already proved to be valid UTF-8 */
+               exoptions |= PCRE_NO_UTF8_CHECK;
+
                /* Check for too many substrings condition. */
                if (count == 0) {
                        php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but 
too many substrings");
@@ -1535,9 +1550,8 @@
                                                          subject_len, 
start_offset,
                                                          exoptions, offsets, 
size_offsets);
                                        if (count < 1) {
-                                               php_error_docref(NULL 
TSRMLS_CC,E_NOTICE, "Unknown error");
-                                               offsets[0] = start_offset;
-                                               offsets[1] = start_offset + 1;
+                                               php_error_docref(NULL 
TSRMLS_CC, E_WARNING, "Unknown error");
+                                               RETURN_FALSE;
                                        }
                                } else {
                                        offsets[0] = start_offset;



-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to