Hi Andrei, all:

In a previous patch 
(http://news.php.net/article.php?group=php.dev&article=84281), support
was added to preg_split for capturing offsets along with matches. The
attached patch adds similar support to preg_match and preg_match_all via
a new PREG_MATCH_OFFSET_CAPTURE flag.

The code handles capturing offsets for both subpattern matches and whole
pattern matches, using the previously-added add_offset_pair helper function.

The flag is a new fourth (and optional) parameter for preg_match, and
are or'd into the existing 'order' parameter for preg_match_all, above
PREG_SET_ORDER and PREG_PATTERN_ORDER.

The patch below is diffed against the CVS head - humbly sumbitted for
application, rejection, suggestions, or extensive flaming. :)


Thanks in advance,

- Dave
  [EMAIL PROTECTED]
--- ext/pcre/php_pcre.c.orig    Tue Jun  4 13:02:50 2002
+++ ext/pcre/php_pcre.c Tue Jun  4 13:12:10 2002
@@ -35,7 +35,9 @@
 #define PREG_PATTERN_ORDER                     0
 #define PREG_SET_ORDER                         1
 
-#define        PREG_SPLIT_NO_EMPTY                     (1<<0)
+#define PREG_MATCH_OFFSET_CAPTURE      (1<<2)
+
+#define PREG_SPLIT_NO_EMPTY                    (1<<0)
 #define PREG_SPLIT_DELIM_CAPTURE       (1<<1)
 #define PREG_SPLIT_OFFSET_CAPTURE      (1<<2)
 
@@ -99,6 +101,7 @@
        
        REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | 
CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | 
CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_MATCH_OFFSET_CAPTURE", PREG_MATCH_OFFSET_CAPTURE, 
+CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | 
CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, 
CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, 
CONST_CS | CONST_PERSISTENT);
@@ -310,6 +313,24 @@
 }
 /* }}} */
 
+/* {{{ add_offset_pair
+ */
+static inline void add_offset_pair(zval *result, char *str, int len, int offset)
+{
+       zval *match_pair;
+
+       ALLOC_ZVAL(match_pair);
+       array_init(match_pair);
+       INIT_PZVAL(match_pair);
+
+       /* Add (match, offset) to the return value */
+       add_next_index_stringl(match_pair, str, len, 1);
+       add_next_index_long(match_pair, offset);
+       
+       zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), 
+NULL);
+}
+/* }}} */
+
 /* {{{ php_pcre_match
  */
 static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
@@ -335,6 +356,7 @@
        int                              matched;                       /* Has 
anything matched */
        int                              i;
        int                              subpats_order_val = 0; /* Integer value of 
subpats_order */
+       int                              offset_capture = 0;    /* If offsets should 
+be captured */
        int                              g_notempty = 0;        /* If the match should 
not be empty */
        const char         **stringlist;                /* Used to hold list of 
subpatterns */
        char                    *match;                         /* The current match */
@@ -363,11 +385,17 @@
        
                        /* Make sure subpats_order is a number */
                        convert_to_long_ex(subpats_order);
-                       subpats_order_val = Z_LVAL_PP(subpats_order);
-                       if (subpats_order_val < PREG_PATTERN_ORDER ||
-                               subpats_order_val > PREG_SET_ORDER) {
-                               zend_error(E_WARNING, "Wrong value for parameter 4 in 
call to preg_match_all()");
-                       }
+            offset_capture = (Z_LVAL_PP(subpats_order) & PREG_MATCH_OFFSET_CAPTURE);
+            
+                       if (global) {
+              subpats_order_val = (Z_LVAL_PP(subpats_order) & 1UL);
+                       
+              if ((subpats_order_val < PREG_PATTERN_ORDER) ||
+                  (subpats_order_val > PREG_SET_ORDER)) {
+                                 zend_error(E_WARNING, "Wrong value for parameter 4 
+in call to preg_match_all()");
+                         }
+            }
+
                        break;
                        
                default:
@@ -442,8 +470,13 @@
                                        if (subpats_order_val == PREG_PATTERN_ORDER) {
                                                /* For each subpattern, insert it into 
the appropriate array. */
                                                for (i = 0; i < count; i++) {
-                                                       
add_next_index_stringl(match_sets[i], (char *)stringlist[i],
-                                                                                      
            offsets[(i<<1)+1] - offsets[i<<1], 1);
+                                                       if (offset_capture) {
+                                                               
+add_offset_pair(match_sets[i], (char *)stringlist[i],
+                                                                                      
+         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+                                                       } else {
+                                                               
+add_next_index_stringl(match_sets[i], (char *)stringlist[i],
+                                                                                      
+                         offsets[(i<<1)+1] - offsets[i<<1], 1);
+                                                       }
                                                }
                                                /*
                                                 * If the number of captured 
subpatterns on this run is
@@ -463,8 +496,13 @@
                                                
                                                /* Add all the subpatterns to it */
                                                for (i = 0; i < count; i++) {
-                                                       
add_next_index_stringl(result_set, (char *)stringlist[i],
-                                                                                      
            offsets[(i<<1)+1] - offsets[i<<1], 1);
+                                                       if (offset_capture) {
+                                                               
+add_offset_pair(result_set, (char *)stringlist[i],
+                                                                                      
+                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+                                                       } else {
+                                                               
+add_next_index_stringl(result_set, (char *)stringlist[i],
+                                                                                      
+                         offsets[(i<<1)+1] - offsets[i<<1], 1);
+                                                       }
                                                }
                                                /* And add it to the output array */
                                                
zend_hash_next_index_insert(Z_ARRVAL_PP(subpats), &result_set,
@@ -474,8 +512,13 @@
                                else {                  /* single pattern matching */
                                        /* For each subpattern, insert it into the 
subpatterns array. */
                                        for (i = 0; i < count; i++) {
-                                               add_next_index_stringl((*subpats), 
(char *)stringlist[i],
-                                                                                      
    offsets[(i<<1)+1] - offsets[i<<1], 1);
+                                               if (offset_capture) {
+                                                       add_offset_pair((*subpats), 
+(char *)stringlist[i],
+                                                                                      
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+                                               } else {
+                                                       
+add_next_index_stringl((*subpats), (char *)stringlist[i],
+                                                                                      
+                 offsets[(i<<1)+1] - offsets[i<<1], 1);
+                                               }
                                        }
                                }
 
@@ -518,7 +561,7 @@
 }
 /* }}} */
 
-/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns])
+/* {{{ proto int preg_match(string pattern, string subject [, array matches [, int 
+flags]])
    Perform a Perl-style regular expression match */
 PHP_FUNCTION(preg_match)
 {
@@ -1063,21 +1106,6 @@
        preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 }
 /* }}} */
-
-static inline void add_offset_pair(zval *result, char *str, int len, int offset)
-{
-       zval *match_pair;
-
-       ALLOC_ZVAL(match_pair);
-       array_init(match_pair);
-       INIT_PZVAL(match_pair);
-
-       /* Add (match, offset) to the return value */
-       add_next_index_stringl(match_pair, str, len, 1);
-       add_next_index_long(match_pair, offset);
-       
-       zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), 
NULL);
-}
 
 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int 
flags]]) 
    Split string into an array using a perl-style regular expression as a delimiter */

-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to