nlopess Sat Mar 8 11:58:12 2008 UTC Modified files: (Branch: PHP_5_3) /php-src/ext/pcre php_pcre.c Log: implement #44336: optimize utf8 string matching add PREG_BAD_UTF8_OFFSET_ERROR constant http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.168.2.9.2.21.2.12&r2=1.168.2.9.2.21.2.13&diff_format=u Index: php-src/ext/pcre/php_pcre.c diff -u php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.12 php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.13 --- php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.12 Mon Mar 3 11:11:43 2008 +++ php-src/ext/pcre/php_pcre.c Sat Mar 8 11:58:12 2008 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_pcre.c,v 1.168.2.9.2.21.2.12 2008/03/03 11:11:43 dmitry Exp $ */ +/* $Id: php_pcre.c,v 1.168.2.9.2.21.2.13 2008/03/08 11:58:12 nlopess Exp $ */ #include "php.h" #include "php_ini.h" @@ -48,7 +48,8 @@ PHP_PCRE_INTERNAL_ERROR, PHP_PCRE_BACKTRACK_LIMIT_ERROR, PHP_PCRE_RECURSION_LIMIT_ERROR, - PHP_PCRE_BAD_UTF8_ERROR + PHP_PCRE_BAD_UTF8_ERROR, + PHP_PCRE_BAD_UTF8_OFFSET_ERROR }; @@ -72,6 +73,10 @@ preg_code = PHP_PCRE_BAD_UTF8_ERROR; break; + case PCRE_ERROR_BADUTF8_OFFSET: + preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; + break; + default: preg_code = PHP_PCRE_INTERNAL_ERROR; break; @@ -145,6 +150,7 @@ REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); return SUCCESS; @@ -614,7 +620,10 @@ count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); - /* Check for too many substrings condition. */ + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; @@ -1034,7 +1043,10 @@ /* Execute the regular expression. */ count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); - + + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); @@ -1472,6 +1484,9 @@ subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); @@ -1535,9 +1550,8 @@ subject_len, start_offset, exoptions, offsets, size_offsets); if (count < 1) { - php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error"); - offsets[0] = start_offset; - offsets[1] = start_offset + 1; + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); + RETURN_FALSE; } } else { offsets[0] = start_offset;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php