andrei          Tue Sep 19 20:41:56 2006 UTC

  Modified files:              
    /php-src/ext/pcre   php_pcre.c 
  Log:
  Unicode support in preg_grep().
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.202&r2=1.203&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.202 php-src/ext/pcre/php_pcre.c:1.203
--- php-src/ext/pcre/php_pcre.c:1.202   Tue Sep 19 20:01:10 2006
+++ php-src/ext/pcre/php_pcre.c Tue Sep 19 20:41:56 2006
@@ -16,14 +16,9 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_pcre.c,v 1.202 2006/09/19 20:01:10 andrei Exp $ */
+/* $Id: php_pcre.c,v 1.203 2006/09/19 20:41:56 andrei Exp $ */
 
-/* UTODO
- *  - PCRE_NO_UTF8_CHECK option for Unicode strings
- *
- *  php_pcre_split_impl():
- *   - Avoid the /./ bump for Unicode strings with U8_FWD_1()
- *
+/*  TODO
  *  php_pcre_replace_impl():
  *   - should use fcall info cache (enhancement)
  */
@@ -840,7 +835,7 @@
 }
 /* }}} */
 
-/* {{{ proto int preg_match(string pattern, string subject [, array 
subpatterns [, int flags [, int offset]]])
+/* {{{ proto int preg_match(string pattern, string subject [, array 
subpatterns [, int flags [, int offset]]]) U
    Perform a Perl-style regular expression match */
 PHP_FUNCTION(preg_match)
 {
@@ -848,7 +843,7 @@
 }
 /* }}} */
 
-/* {{{ proto int preg_match_all(string pattern, string subject, array 
subpatterns [, int flags [, int offset]])
+/* {{{ proto int preg_match_all(string pattern, string subject, array 
subpatterns [, int flags [, int offset]]) U
    Perform a Perl-style global regular expression match */
 PHP_FUNCTION(preg_match_all)
 {
@@ -1448,7 +1443,7 @@
 }
 /* }}} */
 
-/* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, 
int limit [, count]])
+/* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, 
int limit [, count]]) U
    Perform Perl-style regular expression replacement. */
 PHP_FUNCTION(preg_replace)
 {
@@ -1456,7 +1451,7 @@
 }
 /* }}} */
 
-/* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed 
subject [, int limit [, count]])
+/* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed 
subject [, int limit [, count]]) U
    Perform Perl-style regular expression replacement using replacement 
callback. */
 PHP_FUNCTION(preg_replace_callback)
 {
@@ -1464,7 +1459,7 @@
 }
 /* }}} */
 
-/* {{{ proto array preg_split(string pattern, string subject [, int limit [, 
int flags]]) 
+/* {{{ proto array preg_split(string pattern, string subject [, int limit [, 
int flags]]) U
    Split string into an array using a perl-style regular expression as a 
delimiter */
 PHP_FUNCTION(preg_split)
 {
@@ -1672,7 +1667,7 @@
 }
 /* }}} */
 
-/* {{{ proto string preg_quote(string str [, string delim_char])
+/* {{{ proto string preg_quote(string str [, string delim_char]) U
    Quote regular expression characters plus an optional character */
 PHP_FUNCTION(preg_quote)
 {
@@ -1777,7 +1772,7 @@
 }
 /* }}} */
 
-/* {{{ proto array preg_grep(string regex, array input [, int flags])
+/* {{{ proto array preg_grep(string regex, array input [, int flags]) U
    Searches array and returns entries which match regex */
 PHP_FUNCTION(preg_grep)
 {
@@ -1788,8 +1783,8 @@
        pcre_cache_entry        *pce;                   /* Compiled regular 
expression */
 
        /* Get arguments and do error checking */
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", &regex, 
&regex_len,
-                                                         &input, &flags) == 
FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&a|l", &regex,
+                                                         &regex_len, 
UG(utf8_conv), &input, &flags) == FAILURE) {
                return;
        }
        
@@ -1811,10 +1806,13 @@
        int                              size_offsets;          /* Size of the 
offsets array */
        int                              count = 0;                     /* 
Count of matched subpatterns */
        zstr                     string_key;
+       int                              string_key_len;
        ulong                    num_key;
        zend_bool                invert;                        /* Whether to 
return non-matching
                                                                                
   entries */
        int                              rc;
+       int                              exoptions = 0;         /* Execution 
options */
+       
        
        invert = flags & PREG_GREP_INVERT ? 1 : 0;
        
@@ -1839,16 +1837,24 @@
 
        PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
 
+       if (UG(unicode)) {
+               exoptions |= PCRE_NO_UTF8_CHECK;
+       }
+
        /* Go through the input array */
        zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
        while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == 
SUCCESS) {
+               zval subject;
 
-               convert_to_string_ex(entry);
+               subject = **entry;
+               if (Z_TYPE_PP(entry) != IS_STRING) {
+                       zval_copy_ctor(&subject);
+                       convert_to_string_with_converter(&subject, 
UG(utf8_conv));
+               }
 
                /* Perform the match */
-               count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry),
-                                                 Z_STRLEN_PP(entry), 0,
-                                                 0, offsets, size_offsets);
+               count = pcre_exec(pce->re, extra, Z_STRVAL(subject), 
Z_STRLEN(subject),
+                                                 0, exoptions, offsets, 
size_offsets);
 
                /* Check for too many substrings condition. */
                if (count == 0) {
@@ -1862,27 +1868,28 @@
                /* If the entry fits our requirements */
                if ((count > 0 && !invert) ||
                        (count == PCRE_ERROR_NOMATCH && invert)) {
-                       (*entry)->refcount++;
 
                        /* Add to return array */
-                       switch (zend_hash_get_current_key(Z_ARRVAL_P(input), 
&string_key, &num_key, 0))
+                       switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), 
&string_key, &string_key_len, &num_key, 0, NULL))
                        {
                                case HASH_KEY_IS_UNICODE:
-                                       add_u_assoc_zval(return_value, 
IS_UNICODE, string_key, *entry);
+                                       add_u_assoc_zval_ex(return_value, 
IS_UNICODE, string_key, string_key_len, *entry);
                                        break;
 
                                case HASH_KEY_IS_STRING:
-                                       
zend_hash_update(Z_ARRVAL_P(return_value), string_key.s,
-                                                                        
strlen(string_key.s)+1, entry, sizeof(zval *), NULL);
+                                       add_u_assoc_zval_ex(return_value, 
IS_STRING, string_key, string_key_len, *entry);
                                        break;
 
                                case HASH_KEY_IS_LONG:
-                                       
zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
-                                                                               
   sizeof(zval *), NULL);
+                                       add_index_zval(return_value, num_key, 
*entry);
                                        break;
                        }
                }
-               
+
+               if (Z_TYPE_PP(entry) != IS_STRING) {
+                       zval_dtor(&subject);
+               }
+
                zend_hash_move_forward(Z_ARRVAL_P(input));
        }
        

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to