Hi Andrei, all: (This is a re-send of a previous message that received no replies; my apologies if you've already seen/considered it)
In a previous patch (http://news.php.net/article.php?group=php.dev&article=84281), support was added to preg_split for capturing offsets along with matches. The attached patch adds similar support to preg_match and preg_match_all via a new PREG_MATCH_OFFSET_CAPTURE flag. The code handles capturing offsets for both subpattern matches and whole pattern matches, using the previously-added add_offset_pair helper function. The flag is a new fourth (and optional) parameter for preg_match, and are or'd into the existing 'order' parameter for preg_match_all, above PREG_SET_ORDER and PREG_PATTERN_ORDER. The patch below is diffed against the CVS head - humbly sumbitted for application, rejection, suggestions, or extensive flaming. :) Thanks in advance, - Dave [EMAIL PROTECTED] --- ext/pcre/php_pcre.c.orig Tue Jun 4 13:02:50 2002 +++ ext/pcre/php_pcre.c Tue Jun 4 13:12:10 2002 @@ -35,7 +35,9 @@ #define PREG_PATTERN_ORDER 0 #define PREG_SET_ORDER 1 -#define PREG_SPLIT_NO_EMPTY (1<<0) +#define PREG_MATCH_OFFSET_CAPTURE (1<<2) + +#define PREG_SPLIT_NO_EMPTY (1<<0) #define PREG_SPLIT_DELIM_CAPTURE (1<<1) #define PREG_SPLIT_OFFSET_CAPTURE (1<<2) @@ -99,6 +101,7 @@ REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_MATCH_OFFSET_CAPTURE", PREG_MATCH_OFFSET_CAPTURE, +CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); @@ -310,6 +313,24 @@ } /* }}} */ +/* {{{ add_offset_pair + */ +static inline void add_offset_pair(zval *result, char *str, int len, int offset) +{ + zval *match_pair; + + ALLOC_ZVAL(match_pair); + array_init(match_pair); + INIT_PZVAL(match_pair); + + /* Add (match, offset) to the return value */ + add_next_index_stringl(match_pair, str, len, 1); + add_next_index_long(match_pair, offset); + + zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), +NULL); +} +/* }}} */ + /* {{{ php_pcre_match */ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) @@ -335,6 +356,7 @@ int matched; /* Has anything matched */ int i; int subpats_order_val = 0; /* Integer value of subpats_order */ + int offset_capture = 0; /* If offsets should +be captured */ int g_notempty = 0; /* If the match should not be empty */ const char **stringlist; /* Used to hold list of subpatterns */ char *match; /* The current match */ @@ -363,11 +385,17 @@ /* Make sure subpats_order is a number */ convert_to_long_ex(subpats_order); - subpats_order_val = Z_LVAL_PP(subpats_order); - if (subpats_order_val < PREG_PATTERN_ORDER || - subpats_order_val > PREG_SET_ORDER) { - zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()"); - } + offset_capture = (Z_LVAL_PP(subpats_order) & PREG_MATCH_OFFSET_CAPTURE); + + if (global) { + subpats_order_val = (Z_LVAL_PP(subpats_order) & 1UL); + + if ((subpats_order_val < PREG_PATTERN_ORDER) || + (subpats_order_val > PREG_SET_ORDER)) { + zend_error(E_WARNING, "Wrong value for parameter 4 +in call to preg_match_all()"); + } + } + break; default: @@ -442,8 +470,13 @@ if (subpats_order_val == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ for (i = 0; i < count; i++) { - add_next_index_stringl(match_sets[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); + if (offset_capture) { + +add_offset_pair(match_sets[i], (char *)stringlist[i], + + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]); + } else { + +add_next_index_stringl(match_sets[i], (char *)stringlist[i], + + offsets[(i<<1)+1] - offsets[i<<1], 1); + } } /* * If the number of captured subpatterns on this run is @@ -463,8 +496,13 @@ /* Add all the subpatterns to it */ for (i = 0; i < count; i++) { - add_next_index_stringl(result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); + if (offset_capture) { + +add_offset_pair(result_set, (char *)stringlist[i], + + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]); + } else { + +add_next_index_stringl(result_set, (char *)stringlist[i], + + offsets[(i<<1)+1] - offsets[i<<1], 1); + } } /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_PP(subpats), &result_set, @@ -474,8 +512,13 @@ else { /* single pattern matching */ /* For each subpattern, insert it into the subpatterns array. */ for (i = 0; i < count; i++) { - add_next_index_stringl((*subpats), (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); + if (offset_capture) { + add_offset_pair((*subpats), +(char *)stringlist[i], + + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]); + } else { + +add_next_index_stringl((*subpats), (char *)stringlist[i], + + offsets[(i<<1)+1] - offsets[i<<1], 1); + } } } @@ -518,7 +561,7 @@ } /* }}} */ -/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns]) +/* {{{ proto int preg_match(string pattern, string subject [, array matches [, int +flags]]) Perform a Perl-style regular expression match */ PHP_FUNCTION(preg_match) { @@ -1063,21 +1106,6 @@ preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); } /* }}} */ - -static inline void add_offset_pair(zval *result, char *str, int len, int offset) -{ - zval *match_pair; - - ALLOC_ZVAL(match_pair); - array_init(match_pair); - INIT_PZVAL(match_pair); - - /* Add (match, offset) to the return value */ - add_next_index_stringl(match_pair, str, len, 1); - add_next_index_long(match_pair, offset); - - zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); -} /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) Split string into an array using a perl-style regular expression as a delimiter */ -- PHP Development Mailing List <http://www.php.net/> To unsubscribe, visit: http://www.php.net/unsub.php