Needs to be documented: please use [DOC] in your CVS commit mesage in
the future in the future, thanks! :-)
-------- Original Message --------
Subject: cvs: php-src(PHP_5_3) /ext/pcre php_pcre.c
Date: Sat, 08 Mar 2008 11:58:12 -0000
From: [EMAIL PROTECTED] ("Nuno Lopes")
To: [EMAIL PROTECTED]
Newsgroups: php.cvs
Followup-To: php.internals
nlopess Sat Mar 8 11:58:12 2008 UTC
Modified files: (Branch: PHP_5_3)
/php-src/ext/pcre php_pcre.c
Log:
implement #44336: optimize utf8 string matching
add PREG_BAD_UTF8_OFFSET_ERROR constant
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.168.2.9.2.21.2.12&r2=1.168.2.9.2.21.2.13&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.12
php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.13
--- php-src/ext/pcre/php_pcre.c:1.168.2.9.2.21.2.12 Mon Mar 3 11:11:43
2008
+++ php-src/ext/pcre/php_pcre.c Sat Mar 8 11:58:12 2008
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.c,v 1.168.2.9.2.21.2.12 2008/03/03 11:11:43 dmitry Exp
$ */
+/* $Id: php_pcre.c,v 1.168.2.9.2.21.2.13 2008/03/08 11:58:12 nlopess
Exp $ */
#include "php.h"
#include "php_ini.h"
@@ -48,7 +48,8 @@
PHP_PCRE_INTERNAL_ERROR,
PHP_PCRE_BACKTRACK_LIMIT_ERROR,
PHP_PCRE_RECURSION_LIMIT_ERROR,
- PHP_PCRE_BAD_UTF8_ERROR
+ PHP_PCRE_BAD_UTF8_ERROR,
+ PHP_PCRE_BAD_UTF8_OFFSET_ERROR
};
@@ -72,6 +73,10 @@
preg_code = PHP_PCRE_BAD_UTF8_ERROR;
break;
+ case PCRE_ERROR_BADUTF8_OFFSET:
+ preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
+ break;
+
default:
preg_code = PHP_PCRE_INTERNAL_ERROR;
break;
@@ -145,6 +150,7 @@
REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR",
PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR",
PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR,
CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR",
PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(),
CONST_CS | CONST_PERSISTENT);
return SUCCESS;
@@ -614,7 +620,10 @@
count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
- /* Check for too many substrings condition. */
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
+ /* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many
substrings");
count = size_offsets/3;
@@ -1034,7 +1043,10 @@
/* Execute the regular expression. */
count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
-
+
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many
substrings");
@@ -1472,6 +1484,9 @@
subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many
substrings");
@@ -1535,9 +1550,8 @@
subject_len, start_offset,
exoptions, offsets, size_offsets);
if (count < 1) {
- php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error");
- offsets[0] = start_offset;
- offsets[1] = start_offset + 1;
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
+ RETURN_FALSE;
}
} else {
offsets[0] = start_offset;
--
Edward Z. Yang GnuPG: 0x869C48DA
HTML Purifier <http://htmlpurifier.org> Anti-XSS Filter
[[ 3FA8 E9A9 7385 B691 A6FC B3CB A933 BE7D 869C 48DA ]]