moriyoshi Sat Jan 31 17:36:34 2004 EDT
Modified files:
/php-src NEWS
/php-src/ext/pcre php_pcre.c php_pcre.h
Log:
- Fix bug #27103 (preg_split('//u') incorrectly splits UTF-8 strings into octets).
http://cvs.php.net/diff.php/php-src/NEWS?r1=1.1592&r2=1.1593&ty=u
Index: php-src/NEWS
diff -u php-src/NEWS:1.1592 php-src/NEWS:1.1593
--- php-src/NEWS:1.1592 Fri Jan 30 04:21:05 2004
+++ php-src/NEWS Sat Jan 31 17:36:32 2004
@@ -14,6 +14,8 @@
(Derick)
- Fixed problems with longlong values in mysqli. (Georg)
- Fixed class name case preserving of user defined classes. (Marcus)
+- Fixed bug #27103 (preg_split('//u') incorrectly splits UTF-8 strings into
+ octets). (Moriyoshi)
- Fixed bug #27042 (SPL: SeekableIterator seek() broken). (Marcus)
- Fixed bug #27008 (Every class method can be called as static). (Marcus)
- Fixed bug #26938 (exec() has problems reading long lines).
http://cvs.php.net/diff.php/php-src/ext/pcre/php_pcre.c?r1=1.153&r2=1.154&ty=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.153 php-src/ext/pcre/php_pcre.c:1.154
--- php-src/ext/pcre/php_pcre.c:1.153 Sat Jan 31 16:56:42 2004
+++ php-src/ext/pcre/php_pcre.c Sat Jan 31 17:36:33 2004
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.c,v 1.153 2004/01/31 21:56:42 moriyoshi Exp $ */
+/* $Id: php_pcre.c,v 1.154 2004/01/31 22:36:33 moriyoshi Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -132,7 +132,17 @@
/* {{{ pcre_get_compiled_regex
*/
-PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*preg_options TSRMLS_DC) {
+PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*preg_options TSRMLS_DC)
+{
+ int compile_options;
+ return pcre_get_compiled_regex_ex(regex, extra, preg_options,
&compile_options);
+}
+/* }}} */
+
+/* {{{ pcre_get_compiled_regex_ex
+ */
+PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *compile_options TSRMLS_DC)
+{
pcre *re = NULL;
int coptions = 0;
int soptions = 0;
@@ -162,6 +172,7 @@
#endif
*extra = pce->extra;
*preg_options = pce->preg_options;
+ *compile_options = pce->compile_options;
return pce->re;
#if HAVE_SETLOCALE
}
@@ -236,7 +247,7 @@
/* Clear out preg options */
*preg_options = 0;
-
+
/* Parse through the options, setting appropriate flags. Display
a warning if we encounter an unknown modifier. */
while (*pp != 0) {
@@ -297,13 +308,15 @@
}
*preg_options = poptions;
-
+ *compile_options = coptions;
+
efree(pattern);
/* Store the compiled pattern and extra info in the cache. */
new_entry.re = re;
new_entry.extra = *extra;
new_entry.preg_options = poptions;
+ new_entry.compile_options = coptions;
#if HAVE_SETLOCALE
new_entry.locale = pestrdup(locale, 1);
new_entry.tables = tables;
@@ -1168,11 +1181,14 @@
**limit, /* Number of
pieces to return */
**flags;
pcre *re = NULL; /* Compiled regular
expression */
+ pcre *re_bump = NULL; /* Regex instance for empty
matches */
pcre_extra *extra = NULL; /* Holds results of studying */
+ pcre_extra *extra_bump = NULL; /* Almost dummy */
int *offsets; /* Array of
subpattern offsets */
int size_offsets; /* Size of the offsets
array */
int exoptions = 0; /* Execution options */
int preg_options = 0; /* Custom preg options
*/
+ int coptions = 0; /* Custom preg options */
int argc; /* Argument
count */
int limit_val = -1; /* Integer value of
limit */
int no_empty = 0; /* If NO_EMPTY flag is
set */
@@ -1210,7 +1226,7 @@
convert_to_string_ex(subject);
/* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options
TSRMLS_CC)) == NULL) {
+ if ((re = pcre_get_compiled_regex_ex(Z_STRVAL_PP(regex), &extra,
&preg_options, &coptions TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
@@ -1284,8 +1300,26 @@
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the
string. */
if (g_notempty != 0 && start_offset < Z_STRLEN_PP(subject)) {
- offsets[0] = start_offset;
- offsets[1] = start_offset + 1;
+ if (coptions & PCRE_UTF8) {
+ if (re_bump == NULL) {
+ int dummy;
+
+ if ((re_bump =
pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
+ RETURN_FALSE;
+ }
+ }
+ count = pcre_exec(re_bump, extra_bump,
Z_STRVAL_PP(subject),
+ Z_STRLEN_PP(subject),
start_offset,
+ exoptions, offsets,
size_offsets);
+ if (count < 1) {
+ php_error_docref(NULL
TSRMLS_CC,E_NOTICE, "Unknown error");
+ offsets[0] = start_offset;
+ offsets[1] = start_offset + 1;
+ }
+ } else {
+ offsets[0] = start_offset;
+ offsets[1] = start_offset + 1;
+ }
} else
break;
}
http://cvs.php.net/diff.php/php-src/ext/pcre/php_pcre.h?r1=1.37&r2=1.38&ty=u
Index: php-src/ext/pcre/php_pcre.h
diff -u php-src/ext/pcre/php_pcre.h:1.37 php-src/ext/pcre/php_pcre.h:1.38
--- php-src/ext/pcre/php_pcre.h:1.37 Thu Jan 8 12:32:39 2004
+++ php-src/ext/pcre/php_pcre.h Sat Jan 31 17:36:33 2004
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.h,v 1.37 2004/01/08 17:32:39 sniper Exp $ */
+/* $Id: php_pcre.h,v 1.38 2004/01/31 22:36:33 moriyoshi Exp $ */
#ifndef PHP_PCRE_H
#define PHP_PCRE_H
@@ -43,6 +43,7 @@
PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int
subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit
TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *options
TSRMLS_DC);
+PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *coptions TSRMLS_DC);
extern zend_module_entry pcre_module_entry;
#define pcre_module_ptr &pcre_module_entry
@@ -51,6 +52,7 @@
pcre *re;
pcre_extra *extra;
int preg_options;
+ int compile_options;
#if HAVE_SETLOCALE
char *locale;
unsigned const char *tables;
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php