helly Thu Jul 20 22:53:07 2006 UTC Modified files: (Branch: PHP_5_2) /php-src/ext/pcre php_pcre.c php_pcre.h /php-src/ext/pcre/tests grep2.phpt match_flags3.phpt preg_replace.phpt split.phpt Log: - MFH Clean up (after consulting Andrei) . Change the handlers SPL uses to php_pcre_*_impl(pcre_cache_entry*,....) . All refactored funcs (match, split, replace, grep) use the above . Change (zend|php)_error() to php_error_docref() . Move from old to new param parsing api . Fix memleaks in unicode mode
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.168.2.9.2.8&r2=1.168.2.9.2.9&diff_format=u Index: php-src/ext/pcre/php_pcre.c diff -u php-src/ext/pcre/php_pcre.c:1.168.2.9.2.8 php-src/ext/pcre/php_pcre.c:1.168.2.9.2.9 --- php-src/ext/pcre/php_pcre.c:1.168.2.9.2.8 Sun Jul 16 21:09:30 2006 +++ php-src/ext/pcre/php_pcre.c Thu Jul 20 22:53:07 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_pcre.c,v 1.168.2.9.2.8 2006/07/16 21:09:30 helly Exp $ */ +/* $Id: php_pcre.c,v 1.168.2.9.2.9 2006/07/20 22:53:07 helly Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -174,19 +174,20 @@ /* {{{ pcre_get_compiled_regex_cache */ -PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) +PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) { pcre *re = NULL; - int coptions = 0; - int soptions = 0; - const char *error; - int erroffset; - char delimiter; + pcre_extra *extra; + int coptions = 0; + int soptions = 0; + const char *error; + int erroffset; + char delimiter; char start_delimiter; char end_delimiter; - char *p, *pp; + char *p, *pp; char *pattern; - int do_study = 0; + int do_study = 0; int poptions = 0; unsigned const char *tables = NULL; #if HAVE_SETLOCALE @@ -209,9 +210,6 @@ #if HAVE_SETLOCALE if (!strcmp(pce->locale, locale)) { #endif - *extra = pce->extra; - *preg_options = pce->preg_options; - *compile_options = pce->compile_options; return pce; #if HAVE_SETLOCALE } @@ -285,9 +283,6 @@ /* Move on to the options */ pp++; - /* Clear out preg options */ - *preg_options = 0; - /* Parse through the options, setting appropriate flags. Display a warning if we encounter an unknown modifier. */ while (*pp != 0) { @@ -344,18 +339,17 @@ /* If study option was specified, study the pattern and store the result in extra for passing to pcre_exec. */ if (do_study) { - *extra = pcre_study(re, soptions, &error); - if (*extra) { - (*extra)->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; + extra = pcre_study(re, soptions, &error); + if (extra) { + extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; } if (error != NULL) { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Error while studying pattern"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern"); } + } else { + extra = NULL; } - *preg_options = poptions; - *compile_options = coptions; - efree(pattern); /* @@ -370,7 +364,7 @@ /* Store the compiled pattern and extra info in the cache. */ new_entry.re = re; - new_entry.extra = *extra; + new_entry.extra = extra; new_entry.preg_options = poptions; new_entry.compile_options = coptions; #if HAVE_SETLOCALE @@ -388,8 +382,14 @@ */ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) { - int compile_options; - pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex), extra, preg_options, &compile_options TSRMLS_CC); + pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); + + if (extra) { + *extra = pce ? pce->extra : NULL; + } + if (preg_options) { + *preg_options = pce ? pce->preg_options : 0; + } return pce ? pce->re : NULL; } @@ -399,7 +399,17 @@ */ PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) { - pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex), extra, preg_options, compile_options TSRMLS_CC); + pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); + + if (extra) { + *extra = pce ? pce->extra : NULL; + } + if (preg_options) { + *preg_options = pce ? pce->preg_options : 0; + } + if (compile_options) { + *compile_options = pce ? pce->compile_options : 0; + } return pce ? pce->re : NULL; } @@ -429,72 +439,51 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */ { /* parameters */ - char *regex; /* Regular expression */ - char *subject; /* String to match against */ - int regex_len; - int subject_len; - pcre *re = NULL; /* Compiled regular expression */ - pcre_extra *extra = NULL; /* Holds results of studying pattern */ - zval *subpats = NULL; /* Array for subpatterns */ - long flags; /* Match control flags */ - int preg_options = 0; /* Custom preg options */ - long start_offset = 0; /* Where the new search starts */ - int subpats_order = 0; /* Order of subpattern matches */ - int offset_capture = 0;/* Capture match offsets: yes/no */ + char *regex; /* Regular expression */ + char *subject; /* String to match against */ + int regex_len; + int subject_len; + pcre_cache_entry *pce; /* Compiled regular expression */ + zval *subpats = NULL; /* Array for subpatterns */ + long flags = 0; /* Match control flags */ + long start_offset = 0; /* Where the new search starts */ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len, &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { RETURN_FALSE; } - if (global) - subpats_order = PREG_PATTERN_ORDER; - - if (ZEND_NUM_ARGS() > 3) { - offset_capture = flags & PREG_OFFSET_CAPTURE; - /* - * subpats_order is pre-set to pattern mode so we change it only if - * necessary. - */ - if (flags & 0xff) { - subpats_order = flags & 0xff; - } - if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || - (!global && subpats_order != 0)) { - zend_error(E_WARNING, "Wrong value for parameter 4 in call to %s()", get_active_function_name(TSRMLS_C)); - return; - } - } - /* Compile regex or get it from cache. */ - if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { RETURN_FALSE; } - php_pcre_match(re, extra, subject, subject_len, return_value, subpats, - global, preg_options, start_offset, subpats_order, offset_capture TSRMLS_CC); + php_pcre_match_impl(pce, subject, subject_len, return_value, subpats, + global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC); } -PHPAPI void php_pcre_match(pcre *re, pcre_extra *extra, char *subject, int subject_len, zval *return_value, - zval *subpats, int global, int preg_options, long start_offset, int subpats_order, int offset_capture TSRMLS_DC) +PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, + zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC) { - zval *result_set, /* Holds a set of subpatterns after a global match */ **match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ + pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ - int exoptions = 0; /* Execution options */ - int count = 0; /* Count of matched subpatterns */ - int *offsets; /* Array of subpattern offsets */ + int exoptions = 0; /* Execution options */ + int count = 0; /* Count of matched subpatterns */ + int *offsets; /* Array of subpattern offsets */ int num_subpats; /* Number of captured subpatterns */ - int size_offsets; /* Size of the offsets array */ - int matched; /* Has anything matched */ + int size_offsets; /* Size of the offsets array */ + int matched; /* Has anything matched */ int g_notempty = 0; /* If the match should not be empty */ const char **stringlist; /* Holds list of subpatterns */ char *match; /* The current match */ char **subpat_names = NULL;/* Array for named subpatterns */ int i, rc; + int subpats_order; /* Order of subpattern matches */ + int offset_capture; /* Capture match offsets: yes/no */ /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { @@ -502,6 +491,27 @@ array_init(subpats); } + subpats_order = global ? PREG_PATTERN_ORDER : 0; + + if (use_flags) { + offset_capture = flags & PREG_OFFSET_CAPTURE; + + /* + * subpats_order is pre-set to pattern mode so we change it only if + * necessary. + */ + if (flags & 0xff) { + subpats_order = flags & 0xff; + } + if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || + (!global && subpats_order != 0)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified"); + return; + } + } else { + offset_capture = 0; + } + /* Negative offset counts from the end of the string. */ if (start_offset < 0) { start_offset = subject_len + start_offset; @@ -518,10 +528,9 @@ extra->match_limit_recursion = PCRE_G(recursion_limit); /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); if (rc < 0) { - php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", - get_active_function_name(TSRMLS_C), rc); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); RETURN_FALSE; } num_subpats++; @@ -540,10 +549,9 @@ char *name_table; unsigned short name_idx; - rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); if (rc < 0) { - php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", - get_active_function_name(TSRMLS_C), rc); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); efree(offsets); efree(subpat_names); RETURN_FALSE; @@ -552,12 +560,11 @@ int rc1, rc2; long dummy_l; double dummy_d; - rc1 = pcre_fullinfo(re, extra, PCRE_INFO_NAMETABLE, &name_table); - rc2 = pcre_fullinfo(re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); + rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); + rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); rc = rc2 ? rc2 : rc1; if (rc < 0) { - php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", - get_active_function_name(TSRMLS_C), rc); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); efree(offsets); efree(subpat_names); RETURN_FALSE; @@ -567,7 +574,7 @@ name_idx = 0xff * name_table[0] + name_table[1]; subpat_names[name_idx] = name_table + 2; if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), &dummy_l, &dummy_d, 0) > 0) { - php_error(E_WARNING, "%s: numeric named subpatterns are not allowed", get_active_function_name(TSRMLS_C)); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); efree(offsets); efree(subpat_names); RETURN_FALSE; @@ -593,12 +600,12 @@ do { /* Execute the regular expression. */ - count = pcre_exec(re, extra, subject, subject_len, start_offset, + count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); /* Check for too many substrings condition. */ if (count == 0) { - zend_error(E_NOTICE, "Matched, but too many substrings"); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } @@ -613,8 +620,7 @@ if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { efree(subpat_names); efree(offsets); - efree(re); - zend_error(E_WARNING, "Get subpatterns list failed"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed"); RETURN_FALSE; } @@ -843,7 +849,7 @@ /* If found a backreference.. */ if ('\\' == *walk || '$' == *walk) { smart_str_appendl(&code, segment, walk - segment); - if (walk_last == '\\') { + if (walk_last == '\\') { code.c[code.len-1] = *walk++; segment = walk; walk_last = 0; @@ -912,14 +918,26 @@ zval *replace_val, int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC) { - pcre *re = NULL; /* Compiled regular expression */ - pcre_extra *extra = NULL; /* Holds results of studying */ + pcre_cache_entry *pce; /* Compiled regular expression */ + + /* Compile regex or get it from cache. */ + if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + return NULL; + } + + return php_pcre_replace_impl(pce, subject, subject_len, replace_val, + is_callable_replace, result_len, limit, replace_count TSRMLS_CC); +} + +PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, + int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC) +{ + pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ - int exoptions = 0; /* Execution options */ - int preg_options = 0; /* Custom preg options */ - int count = 0; /* Count of matched subpatterns */ - int *offsets; /* Array of subpattern offsets */ - int size_offsets; /* Size of the offsets array */ + int exoptions = 0; /* Execution options */ + int count = 0; /* Count of matched subpatterns */ + int *offsets; /* Array of subpattern offsets */ + int size_offsets; /* Size of the offsets array */ int new_len; /* Length of needed storage */ int alloc_len; /* Actual allocated length */ int eval_result_len=0; /* Length of the eval'ed or @@ -942,11 +960,6 @@ walk_last; /* Last walked character */ int rc; - /* Compile regex or get it from cache. */ - if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options TSRMLS_CC)) == NULL) { - return NULL; - } - if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; @@ -954,10 +967,10 @@ extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); - eval = preg_options & PREG_REPLACE_EVAL; + eval = pce->preg_options & PREG_REPLACE_EVAL; if (is_callable_replace) { if (eval) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "/e modifier cannot be used with replacement callback"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback"); return NULL; } } else { @@ -967,10 +980,9 @@ } /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); if (rc < 0) { - php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", - get_active_function_name(TSRMLS_C), rc); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); return NULL; } size_offsets = (size_offsets + 1) * 3; @@ -987,7 +999,7 @@ while (1) { /* Execute the regular expression. */ - count = pcre_exec(re, extra, subject, subject_len, start_offset, + count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); /* Check for too many substrings condition. */ @@ -1004,7 +1016,7 @@ } /* Set the match location in subject */ match = subject + offsets[0]; - + new_len = *result_len + offsets[0] - start_offset; /* part before the match */ /* If evaluating, do it and add the return string's length */ @@ -1210,13 +1222,13 @@ } else { result = php_pcre_replace(Z_STRVAL_P(regex), Z_STRLEN_P(regex), - Z_STRVAL_PP(subject), + Z_STRVAL_PP(subject), Z_STRLEN_PP(subject), replace, is_callable_replace, result_len, limit, - replace_count TSRMLS_CC); + replace_count TSRMLS_CC); return result; } } @@ -1231,15 +1243,15 @@ **subject, **limit, **subject_entry, - **zcount; + **zcount; char *result; int result_len; int limit_val = -1; char *string_key; ulong num_key; - char *callback_name = NULL; - int replace_count=0; - int *replace_count_ptr=NULL; + char *callback_name; + int replace_count=0; + int *replace_count_ptr=NULL; /* Get function parameters and do error-checking. */ if (ZEND_NUM_ARGS() < 3 || ZEND_NUM_ARGS() > 5 || @@ -1256,7 +1268,7 @@ convert_to_string_ex(replace); if (is_callable_replace) { if (!zend_is_callable(*replace, 0, &callback_name)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "requires argument 2, '%s', to be a valid callback", callback_name); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name); efree(callback_name); *return_value = **subject; zval_copy_ctor(return_value); @@ -1337,63 +1349,39 @@ Split string into an array using a perl-style regular expression as a delimiter */ PHP_FUNCTION(preg_split) { - zval **regex, /* Regular expression to split by */ - **subject, /* Subject string to split */ - **limit, /* Number of pieces to return */ - **flags; - int preg_options = 0; /* Custom preg options */ - int coptions = 0; /* Custom preg options */ - int argc; /* Argument count */ - int limit_val = -1; /* Integer value of limit */ - int no_empty = 0; /* If NO_EMPTY flag is set */ - int delim_capture = 0; /* If delimiters should be captured */ - int offset_capture = 0;/* If offsets should be captured */ - pcre *re; /* Compiled regular expression */ - pcre_extra *extra = NULL; /* Holds results of studying */ + char *regex; /* Regular expression */ + char *subject; /* String to match against */ + int regex_len; + int subject_len; + long limit_val = -1;/* Integer value of limit */ + long flags = 0; /* Match control flags */ + pcre_cache_entry *pce; /* Compiled regular expression */ /* Get function parameters and do error checking */ - argc = ZEND_NUM_ARGS(); - if (argc < 2 || argc > 4 || zend_get_parameters_ex(argc, ®ex, &subject, &limit, &flags) == FAILURE) { - WRONG_PARAM_COUNT; - } - - if (argc > 2) { - convert_to_long_ex(limit); - limit_val = Z_LVAL_PP(limit); - if (limit_val == 0) - limit_val = -1; - - if (argc > 3) { - convert_to_long_ex(flags); - no_empty = Z_LVAL_PP(flags) & PREG_SPLIT_NO_EMPTY; - delim_capture = Z_LVAL_PP(flags) & PREG_SPLIT_DELIM_CAPTURE; - offset_capture = Z_LVAL_PP(flags) & PREG_SPLIT_OFFSET_CAPTURE; - } + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len, + &subject, &subject_len, &limit_val, &flags) == FAILURE) { + RETURN_FALSE; } - /* Make sure we're dealing with strings */ - convert_to_string_ex(regex); - convert_to_string_ex(subject); - /* Compile regex or get it from cache. */ - if ((re = pcre_get_compiled_regex_ex(Z_STRVAL_PP(regex), &extra, &preg_options, &coptions TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { RETURN_FALSE; } - php_pcre_split(re, extra, Z_STRVAL_PP(subject), Z_STRLEN_PP(subject), return_value, - coptions, limit_val, no_empty, delim_capture, offset_capture TSRMLS_CC); + php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC); } /* {{{ php_pcre_split */ -PHPAPI void php_pcre_split(pcre *re, pcre_extra *extra, char *subject, int subject_len, zval *return_value, - int coptions, int limit_val, int no_empty, int delim_capture, int offset_capture TSRMLS_DC) +PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, + long limit_val, long flags TSRMLS_DC) { + pcre_extra *extra = NULL; /* Holds results of studying */ pcre *re_bump = NULL; /* Regex instance for empty matches */ pcre_extra *extra_bump = NULL; /* Almost dummy */ pcre_extra extra_data; /* Used locally for exec options */ - int *offsets; /* Array of subpattern offsets */ - int size_offsets; /* Size of the offsets array */ + int *offsets; /* Array of subpattern offsets */ + int size_offsets; /* Size of the offsets array */ int exoptions = 0; /* Execution options */ int count = 0; /* Count of matched subpatterns */ int start_offset; /* Where the new search starts */ @@ -1402,6 +1390,17 @@ char *match, /* The current match */ *last_match; /* Location of last match */ int rc; + int no_empty; /* If NO_EMPTY flag is set */ + int delim_capture; /* If delimiters should be captured */ + int offset_capture; /* If offsets should be captured */ + + no_empty = flags & PREG_SPLIT_NO_EMPTY; + delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; + offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE; + + if (limit_val == 0) { + limit_val = -1; + } if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; @@ -1414,10 +1413,9 @@ array_init(return_value); /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); if (rc < 0) { - php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", - get_active_function_name(TSRMLS_C), rc); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); RETURN_FALSE; } size_offsets = (size_offsets + 1) * 3; @@ -1432,7 +1430,7 @@ /* Get next piece if no limit or limit not yet reached and something matched*/ while ((limit_val == -1 || limit_val > 1)) { - count = pcre_exec(re, extra, subject, + count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); @@ -1452,7 +1450,7 @@ /* Add (match, offset) pair to the return value */ add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL); } else { - /* Add the piece to the return value */ + /* Add the piece to the return value */ add_next_index_stringl(return_value, last_match, &subject[offsets[0]]-last_match, 1); } @@ -1463,7 +1461,7 @@ } last_match = &subject[offsets[1]]; - next_offset = offsets[1]; + next_offset = offsets[1]; if (delim_capture) { int i, match_len; @@ -1487,7 +1485,7 @@ the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { - if (coptions & PCRE_UTF8) { + if (pce->compile_options & PCRE_UTF8) { if (re_bump == NULL) { int dummy; @@ -1542,48 +1540,43 @@ } /* }}} */ -/* {{{ proto string preg_quote(string str, string delim_char) +/* {{{ proto string preg_quote(string str [, string delim_char]) Quote regular expression characters plus an optional character */ PHP_FUNCTION(preg_quote) { - zval **in_str_arg; /* Input string argument */ - zval **delim; /* Additional delimiter argument */ - char *in_str, /* Input string */ - *in_str_end, /* End of the input string */ - *out_str, /* Output string with quoted characters */ + int in_str_len; + char *in_str; /* Input string argument */ + char *in_str_end; /* End of the input string */ + int delim_len; + char *delim = NULL; /* Additional delimiter argument */ + char *out_str, /* Output string with quoted characters */ *p, /* Iterator for input string */ *q, /* Iterator for output string */ delim_char=0, /* Delimiter character to be quoted */ - c; /* Current character */ + c; /* Current character */ zend_bool quote_delim = 0; /* Whether to quote additional delim char */ /* Get the arguments and check for errors */ - if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2 || - zend_get_parameters_ex(ZEND_NUM_ARGS(), &in_str_arg, &delim) == FAILURE) { - WRONG_PARAM_COUNT; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len, + &delim, &delim_len) == FAILURE) { + return; } - /* Make sure we're working with strings */ - convert_to_string_ex(in_str_arg); - in_str = Z_STRVAL_PP(in_str_arg); - in_str_end = Z_STRVAL_PP(in_str_arg) + Z_STRLEN_PP(in_str_arg); + in_str_end = in_str + in_str_len; /* Nothing to do if we got an empty string */ if (in_str == in_str_end) { RETURN_EMPTY_STRING(); } - if (ZEND_NUM_ARGS() == 2) { - convert_to_string_ex(delim); - if (Z_STRLEN_PP(delim) > 0) { - delim_char = Z_STRVAL_PP(delim)[0]; - quote_delim = 1; - } + if (delim && *delim) { + delim_char = delim[0]; + quote_delim = 1; } /* Allocate enough memory so that even if each character is quoted, we won't run out of room */ - out_str = safe_emalloc(4, Z_STRLEN_PP(in_str_arg), 1); + out_str = safe_emalloc(4, in_str_len, 1); /* Go through the string and quote necessary characters */ for(p = in_str, q = out_str; p != in_str_end; p++) { @@ -1633,54 +1626,47 @@ } /* }}} */ -/* {{{ proto array preg_grep(string regex, array input) +/* {{{ proto array preg_grep(string regex, array input [, int flags]) Searches array and returns entries which match regex */ PHP_FUNCTION(preg_grep) { - zval **regex, /* Regular expression */ - **input, /* Input array */ - **flags, - **entry; /* An entry in the input array */ - pcre *re = NULL; /* Compiled regular expression */ - pcre_extra *extra = NULL; /* Holds results of studying */ - pcre_extra extra_data; /* Used locally for exec options */ - int preg_options = 0; /* Custom preg options */ - int *offsets; /* Array of subpattern offsets */ - int size_offsets; /* Size of the offsets array */ - int count = 0; /* Count of matched subpatterns */ - char *string_key; - ulong num_key; - zend_bool invert = 0; /* Whether to return non-matching - entries */ - int rc; - + char *regex; /* Regular expression */ + int regex_len; + zval *input; /* Input array */ + long flags = 0; /* Match control flags */ + pcre_cache_entry *pce; /* Compiled regular expression */ + /* Get arguments and do error checking */ - - if (ZEND_NUM_ARGS() < 2 || ZEND_NUM_ARGS() > 3 || - zend_get_parameters_ex(ZEND_NUM_ARGS(), ®ex, &input, &flags) == FAILURE) { - WRONG_PARAM_COUNT; - } - - if (Z_TYPE_PP(input) != IS_ARRAY) { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Second argument to preg_grep() should be an array"); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len, + &input, &flags) == FAILURE) { return; } - - SEPARATE_ZVAL(input); - - /* Make sure regex is a string */ - convert_to_string_ex(regex); - - if (ZEND_NUM_ARGS() > 2) { - convert_to_long_ex(flags); - invert = (Z_LVAL_PP(flags) & PREG_GREP_INVERT) ? 1 : 0; - } /* Compile regex or get it from cache. */ - if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { RETURN_FALSE; } + + php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC); +} +PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, + long flags TSRMLS_DC) +{ + zval **entry; /* An entry in the input array */ + pcre_extra *extra = pce->extra;/* Holds results of studying */ + pcre_extra extra_data; /* Used locally for exec options */ + int *offsets; /* Array of subpattern offsets */ + int size_offsets; /* Size of the offsets array */ + int count = 0; /* Count of matched subpatterns */ + char *string_key; + ulong num_key; + zend_bool invert; /* Whether to return non-matching + entries */ + int rc; + + invert = flags & PREG_GREP_INVERT ? 1 : 0; + if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; @@ -1689,10 +1675,9 @@ extra->match_limit_recursion = PCRE_G(recursion_limit); /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); if (rc < 0) { - php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d", - get_active_function_name(TSRMLS_C), rc); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); RETURN_FALSE; } size_offsets = (size_offsets + 1) * 3; @@ -1704,13 +1689,13 @@ PCRE_G(error_code) = PHP_PCRE_NO_ERROR; /* Go through the input array */ - zend_hash_internal_pointer_reset(Z_ARRVAL_PP(input)); - while(zend_hash_get_current_data(Z_ARRVAL_PP(input), (void **)&entry) == SUCCESS) { + zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); + while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { convert_to_string_ex(entry); /* Perform the match */ - count = pcre_exec(re, extra, Z_STRVAL_PP(entry), + count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), 0, 0, offsets, size_offsets); @@ -1729,7 +1714,7 @@ (*entry)->refcount++; /* Add to return array */ - switch (zend_hash_get_current_key(Z_ARRVAL_PP(input), &string_key, &num_key, 0)) + switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) { case HASH_KEY_IS_STRING: zend_hash_update(Z_ARRVAL_P(return_value), string_key, @@ -1743,7 +1728,7 @@ } } - zend_hash_move_forward(Z_ARRVAL_PP(input)); + zend_hash_move_forward(Z_ARRVAL_P(input)); } /* Clean up */ http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.h?r1=1.41.2.1.2.3&r2=1.41.2.1.2.4&diff_format=u Index: php-src/ext/pcre/php_pcre.h diff -u php-src/ext/pcre/php_pcre.h:1.41.2.1.2.3 php-src/ext/pcre/php_pcre.h:1.41.2.1.2.4 --- php-src/ext/pcre/php_pcre.h:1.41.2.1.2.3 Sun Jul 16 21:09:30 2006 +++ php-src/ext/pcre/php_pcre.h Thu Jul 20 22:53:07 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_pcre.h,v 1.41.2.1.2.3 2006/07/16 21:09:30 helly Exp $ */ +/* $Id: php_pcre.h,v 1.41.2.1.2.4 2006/07/20 22:53:07 helly Exp $ */ #ifndef PHP_PCRE_H #define PHP_PCRE_H @@ -41,13 +41,9 @@ PHP_FUNCTION(preg_quote); PHP_FUNCTION(preg_grep); -PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC); +PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC); PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *options TSRMLS_DC); PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *coptions TSRMLS_DC); -PHPAPI void php_pcre_split(pcre *re, pcre_extra *extra, char *subject, int subject_len, zval *return_value, - int coptions, int limit_val, int no_empty, int delim_capture, int offset_capture TSRMLS_DC); -PHPAPI void php_pcre_match(pcre *re, pcre_extra *extra, char *subject, int subject_len, zval *return_value, - zval *subpats, int global, int preg_options, long start_offset, int subpats_order, int offset_capture TSRMLS_DC); extern zend_module_entry pcre_module_entry; #define pcre_module_ptr &pcre_module_entry @@ -64,7 +60,19 @@ int refcount; } pcre_cache_entry; -PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC); +PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC); + +PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, + zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC); + +PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, + int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC); + +PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, + long limit_val, long flags TSRMLS_DC); + +PHPAPI void php_pcre_grep_impl( pcre_cache_entry *pce, zval *input, zval *return_value, + long flags TSRMLS_DC); ZEND_BEGIN_MODULE_GLOBALS(pcre) HashTable pcre_cache; http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/grep2.phpt?r1=1.1.2.1&r2=1.1.2.2&diff_format=u Index: php-src/ext/pcre/tests/grep2.phpt diff -u php-src/ext/pcre/tests/grep2.phpt:1.1.2.1 php-src/ext/pcre/tests/grep2.phpt:1.1.2.2 --- php-src/ext/pcre/tests/grep2.phpt:1.1.2.1 Sun Jul 16 19:35:09 2006 +++ php-src/ext/pcre/tests/grep2.phpt Thu Jul 20 22:53:07 2006 @@ -19,10 +19,10 @@ ?> --EXPECTF-- -Warning: Wrong parameter count for preg_grep() in %sgrep2.php on line 3 +Warning: preg_grep() expects at most 3 parameters, 4 given in %sgrep2.php on line 3 NULL -Warning: preg_grep(): Second argument to preg_grep() should be an array in %sgrep2.php on line 4 +Warning: preg_grep() expects parameter 2 to be array, integer given in %sgrep2.php on line 4 NULL Warning: preg_grep(): Compilation failed: nothing to repeat at offset 0 in %sgrep2.php on line 5 http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/match_flags3.phpt?r1=1.1.2.2&r2=1.1.2.3&diff_format=u Index: php-src/ext/pcre/tests/match_flags3.phpt diff -u php-src/ext/pcre/tests/match_flags3.phpt:1.1.2.2 php-src/ext/pcre/tests/match_flags3.phpt:1.1.2.3 --- php-src/ext/pcre/tests/match_flags3.phpt:1.1.2.2 Mon Jul 3 14:46:44 2006 +++ php-src/ext/pcre/tests/match_flags3.phpt Thu Jul 20 22:53:07 2006 @@ -18,8 +18,9 @@ ?> --EXPECTF-- -Warning: Wrong value for parameter 4 in call to preg_match() in %smatch_flags3.php on line 3 -NULL + +Warning: preg_match(): Empty regular expression in %smatch_flags3.php on line 3 +bool(false) int(1) array(1) { [0]=> @@ -41,5 +42,5 @@ } } -Warning: preg_match: numeric named subpatterns are not allowed in %smatch_flags3.php on line 14 +Warning: preg_match(): Numeric named subpatterns are not allowed in %smatch_flags3.php on line 14 bool(false) http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/preg_replace.phpt?r1=1.1.2.1&r2=1.1.2.2&diff_format=u Index: php-src/ext/pcre/tests/preg_replace.phpt diff -u php-src/ext/pcre/tests/preg_replace.phpt:1.1.2.1 php-src/ext/pcre/tests/preg_replace.phpt:1.1.2.2 --- php-src/ext/pcre/tests/preg_replace.phpt:1.1.2.1 Mon Jul 3 13:04:45 2006 +++ php-src/ext/pcre/tests/preg_replace.phpt Thu Jul 20 22:53:07 2006 @@ -18,8 +18,8 @@ string(4) "abcd" string(8) "zaab2k3l" -Warning: preg_replace_callback(): requires argument 2, '', to be a valid callback in %spreg_replace.php on line 8 +Warning: preg_replace_callback(): Requires argument 2, '', to be a valid callback in %spreg_replace.php on line 8 string(0) "" -Warning: preg_replace_callback(): /e modifier cannot be used with replacement callback in %spreg_replace.php on line 10 +Warning: preg_replace_callback(): Modifier /e cannot be used with replacement callback in %spreg_replace.php on line 10 NULL http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/split.phpt?r1=1.1.2.1&r2=1.1.2.2&diff_format=u Index: php-src/ext/pcre/tests/split.phpt diff -u php-src/ext/pcre/tests/split.phpt:1.1.2.1 php-src/ext/pcre/tests/split.phpt:1.1.2.2 --- php-src/ext/pcre/tests/split.phpt:1.1.2.1 Sun Jul 16 19:35:09 2006 +++ php-src/ext/pcre/tests/split.phpt Thu Jul 20 22:53:07 2006 @@ -17,8 +17,8 @@ ?> --EXPECTF-- -Warning: Wrong parameter count for preg_split() in %ssplit.php on line 3 -NULL +Warning: preg_split() expects at least 2 parameters, 0 given in %ssplit.php on line 3 +bool(false) Warning: preg_split(): Compilation failed: nothing to repeat at offset 0 in %ssplit.php on line 4 bool(false)
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php