andrei Tue Sep 19 20:41:56 2006 UTC Modified files: /php-src/ext/pcre php_pcre.c Log: Unicode support in preg_grep(). http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.202&r2=1.203&diff_format=u Index: php-src/ext/pcre/php_pcre.c diff -u php-src/ext/pcre/php_pcre.c:1.202 php-src/ext/pcre/php_pcre.c:1.203 --- php-src/ext/pcre/php_pcre.c:1.202 Tue Sep 19 20:01:10 2006 +++ php-src/ext/pcre/php_pcre.c Tue Sep 19 20:41:56 2006 @@ -16,14 +16,9 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_pcre.c,v 1.202 2006/09/19 20:01:10 andrei Exp $ */ +/* $Id: php_pcre.c,v 1.203 2006/09/19 20:41:56 andrei Exp $ */ -/* UTODO - * - PCRE_NO_UTF8_CHECK option for Unicode strings - * - * php_pcre_split_impl(): - * - Avoid the /./ bump for Unicode strings with U8_FWD_1() - * +/* TODO * php_pcre_replace_impl(): * - should use fcall info cache (enhancement) */ @@ -840,7 +835,7 @@ } /* }}} */ -/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset]]]) +/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset]]]) U Perform a Perl-style regular expression match */ PHP_FUNCTION(preg_match) { @@ -848,7 +843,7 @@ } /* }}} */ -/* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]]) +/* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]]) U Perform a Perl-style global regular expression match */ PHP_FUNCTION(preg_match_all) { @@ -1448,7 +1443,7 @@ } /* }}} */ -/* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, count]]) +/* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, count]]) U Perform Perl-style regular expression replacement. */ PHP_FUNCTION(preg_replace) { @@ -1456,7 +1451,7 @@ } /* }}} */ -/* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, count]]) +/* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, count]]) U Perform Perl-style regular expression replacement using replacement callback. */ PHP_FUNCTION(preg_replace_callback) { @@ -1464,7 +1459,7 @@ } /* }}} */ -/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) +/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) U Split string into an array using a perl-style regular expression as a delimiter */ PHP_FUNCTION(preg_split) { @@ -1672,7 +1667,7 @@ } /* }}} */ -/* {{{ proto string preg_quote(string str [, string delim_char]) +/* {{{ proto string preg_quote(string str [, string delim_char]) U Quote regular expression characters plus an optional character */ PHP_FUNCTION(preg_quote) { @@ -1777,7 +1772,7 @@ } /* }}} */ -/* {{{ proto array preg_grep(string regex, array input [, int flags]) +/* {{{ proto array preg_grep(string regex, array input [, int flags]) U Searches array and returns entries which match regex */ PHP_FUNCTION(preg_grep) { @@ -1788,8 +1783,8 @@ pcre_cache_entry *pce; /* Compiled regular expression */ /* Get arguments and do error checking */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len, - &input, &flags) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&a|l", ®ex, + ®ex_len, UG(utf8_conv), &input, &flags) == FAILURE) { return; } @@ -1811,10 +1806,13 @@ int size_offsets; /* Size of the offsets array */ int count = 0; /* Count of matched subpatterns */ zstr string_key; + int string_key_len; ulong num_key; zend_bool invert; /* Whether to return non-matching entries */ int rc; + int exoptions = 0; /* Execution options */ + invert = flags & PREG_GREP_INVERT ? 1 : 0; @@ -1839,16 +1837,24 @@ PCRE_G(error_code) = PHP_PCRE_NO_ERROR; + if (UG(unicode)) { + exoptions |= PCRE_NO_UTF8_CHECK; + } + /* Go through the input array */ zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { + zval subject; - convert_to_string_ex(entry); + subject = **entry; + if (Z_TYPE_PP(entry) != IS_STRING) { + zval_copy_ctor(&subject); + convert_to_string_with_converter(&subject, UG(utf8_conv)); + } /* Perform the match */ - count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry), - Z_STRLEN_PP(entry), 0, - 0, offsets, size_offsets); + count = pcre_exec(pce->re, extra, Z_STRVAL(subject), Z_STRLEN(subject), + 0, exoptions, offsets, size_offsets); /* Check for too many substrings condition. */ if (count == 0) { @@ -1862,27 +1868,28 @@ /* If the entry fits our requirements */ if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) { - (*entry)->refcount++; /* Add to return array */ - switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) + switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL)) { case HASH_KEY_IS_UNICODE: - add_u_assoc_zval(return_value, IS_UNICODE, string_key, *entry); + add_u_assoc_zval_ex(return_value, IS_UNICODE, string_key, string_key_len, *entry); break; case HASH_KEY_IS_STRING: - zend_hash_update(Z_ARRVAL_P(return_value), string_key.s, - strlen(string_key.s)+1, entry, sizeof(zval *), NULL); + add_u_assoc_zval_ex(return_value, IS_STRING, string_key, string_key_len, *entry); break; case HASH_KEY_IS_LONG: - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry, - sizeof(zval *), NULL); + add_index_zval(return_value, num_key, *entry); break; } } - + + if (Z_TYPE_PP(entry) != IS_STRING) { + zval_dtor(&subject); + } + zend_hash_move_forward(Z_ARRVAL_P(input)); }
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php