andrei Tue Oct 3 18:13:36 2006 UTC Modified files: /php-src/ext/standard string.c /php-src unicode-progress.txt Log: Unicode support in strripos().
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.599&r2=1.600&diff_format=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.599 php-src/ext/standard/string.c:1.600 --- php-src/ext/standard/string.c:1.599 Tue Oct 3 17:45:16 2006 +++ php-src/ext/standard/string.c Tue Oct 3 18:13:36 2006 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: string.c,v 1.599 2006/10/03 17:45:16 iliaa Exp $ */ +/* $Id: string.c,v 1.600 2006/10/03 18:13:36 andrei Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2156,7 +2156,7 @@ /* {{{ php_u_stristr Unicode version of case insensitve strstr */ -PHPAPI UChar *php_u_stristr(UChar *str, UChar *pat, int str_len, int pat_len TSRMLS_DC) +PHPAPI UChar *php_u_stristr(UChar *str, UChar *pat, int str_len, int pat_len, zend_bool find_first TSRMLS_DC) { UChar *str_fold, *pat_fold; int str_fold_len, pat_fold_len; @@ -2167,7 +2167,11 @@ zend_case_fold_string(&str_fold, &str_fold_len, str, str_len, U_FOLD_CASE_DEFAULT, &status); if (str_fold_len == str_len) { zend_case_fold_string(&pat_fold, &pat_fold_len, pat, pat_len, U_FOLD_CASE_DEFAULT, &status); - found = u_strFindFirst(str_fold, str_fold_len, pat_fold, pat_fold_len); + if (find_first) { + found = u_strFindFirst(str_fold, str_fold_len, pat_fold, pat_fold_len); + } else { + found = u_strFindLast(str_fold, str_fold_len, pat_fold, pat_fold_len); + } if (found) { result = str + (found - str_fold); } else { @@ -2179,7 +2183,11 @@ usearch_setPattern(UG(root_search), pat, pat_len, &status); usearch_setOffset(UG(root_search), 0, &status); - offset = usearch_first(UG(root_search), &status); + if (find_first) { + offset = usearch_first(UG(root_search), &status); + } else { + offset = usearch_last(UG(root_search), &status); + } if (offset != USEARCH_DONE) { result = str + offset; } else { @@ -2388,7 +2396,7 @@ if (Z_TYPE_PP(haystack) == IS_UNICODE) { found = php_u_stristr(Z_USTRVAL_PP(haystack), target.u, - Z_USTRLEN_PP(haystack), needle_len TSRMLS_CC); + Z_USTRLEN_PP(haystack), needle_len, 1 TSRMLS_CC); } else { haystack_copy = estrndup(Z_STRVAL_PP(haystack), Z_STRLEN_PP(haystack)); found = php_stristr(Z_STRVAL_PP(haystack), target.s, @@ -2668,7 +2676,7 @@ if (Z_TYPE_PP(haystack) == IS_UNICODE) { /* calculate code unit offset */ U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset); - found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len TSRMLS_CC); + found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len, 1 TSRMLS_CC); } else { haystack_dup = estrndup(Z_STRVAL_PP(haystack), haystack_len); php_strtolower((char *)haystack_dup, haystack_len); @@ -2690,7 +2698,7 @@ /* calculate code unit offset */ U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset); found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset, - u_needle_char, haystack_len, needle_len TSRMLS_CC); + u_needle_char, haystack_len, needle_len, 1 TSRMLS_CC); } else { c = tolower((char)Z_LVAL_PP(needle)); needle_char[0] = c; @@ -2758,6 +2766,7 @@ needle = Z_UNIVAL_PP(zneedle); needle_len = Z_UNILEN_PP(zneedle); } else { + convert_to_long_ex(zneedle); if (Z_TYPE_PP(zhaystack) == IS_UNICODE) { if (Z_LVAL_PP(zneedle) < 0 || Z_LVAL_PP(zneedle) > 0x10FFFF) { php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)"); @@ -2767,7 +2776,6 @@ u_ord_needle[needle_len] = 0; needle.u = u_ord_needle; } else { - convert_to_long_ex(zneedle); ord_needle[0] = (char)(Z_LVAL_PP(zneedle) & 0xFF); ord_needle[1] = '\0'; needle.s = ord_needle; @@ -2860,92 +2868,158 @@ } /* }}} */ -/* {{{ proto int strripos(string haystack, string needle [, int offset]) +/* {{{ proto int strripos(string haystack, string needle [, int offset]) U Finds position of last occurrence of a string within another string */ PHP_FUNCTION(strripos) { + zstr haystack, needle; zval **zneedle; - char *needle, *haystack; int needle_len, haystack_len; + zend_uchar haystack_type; long offset = 0; - char *p, *e, ord_needle[2]; + char *p, *e, needle_char[2]; + UChar *u_p, *u_e, *pos; + UChar u_needle_char[3]; char *needle_dup, *haystack_dup; + int cu_offset = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ|l", &haystack, &haystack_len, &zneedle, &offset) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|l", &haystack, + &haystack_len, &haystack_type, &zneedle, &offset) == FAILURE) { RETURN_FALSE; } - if (Z_TYPE_PP(zneedle) == IS_STRING) { - needle = Z_STRVAL_PP(zneedle); - needle_len = Z_STRLEN_PP(zneedle); + if (Z_TYPE_PP(zneedle) == IS_STRING || Z_TYPE_PP(zneedle) == IS_UNICODE) { + convert_to_explicit_type_ex(zneedle, haystack_type); + needle = Z_UNIVAL_PP(zneedle); + needle_len = Z_UNILEN_PP(zneedle); } else { convert_to_long_ex(zneedle); - ord_needle[0] = (char)(Z_LVAL_PP(zneedle) & 0xFF); - ord_needle[1] = '\0'; - needle = ord_needle; - needle_len = 1; + if (haystack_type == IS_UNICODE) { + if (Z_LVAL_PP(zneedle) < 0 || Z_LVAL_PP(zneedle) > 0x10FFFF) { + php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)"); + RETURN_FALSE; + } + needle_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(zneedle), u_needle_char); + u_needle_char[needle_len] = 0; + needle.u = u_needle_char; + } else { + needle_char[0] = (char)(Z_LVAL_PP(zneedle) & 0xFF); + needle_char[1] = '\0'; + needle.s = needle_char; + needle_len = 1; + } } - if ((haystack_len == 0) || (needle_len == 0)) { + if ((haystack_len == 0) || (needle_len == 0) || needle_len > haystack_len) { RETURN_FALSE; } - if (needle_len == 1) { - /* Single character search can shortcut memcmps - Can also avoid tolower emallocs */ + if (haystack_type == IS_UNICODE) { if (offset >= 0) { - p = haystack + offset; - e = haystack + haystack_len - 1; + U16_FWD_N(haystack.u, cu_offset, haystack_len, offset); + if (cu_offset > haystack_len - needle_len) { + RETURN_FALSE; + } + u_p = haystack.u + cu_offset; + u_e = haystack.u + haystack_len - needle_len; } else { - p = haystack; + u_p = haystack.u; if (-offset > haystack_len) { - e = haystack + haystack_len - 1; + RETURN_FALSE; } else { - e = haystack + haystack_len + offset; + cu_offset = haystack_len; + U16_BACK_N(haystack.u, 0, cu_offset, -offset); + if (cu_offset == 0) { + RETURN_FALSE; + } + if (needle_len > haystack_len - cu_offset) { + u_e = haystack.u + haystack_len - needle_len; + } else { + u_e = haystack.u + cu_offset; + } } } - /* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */ - *ord_needle = tolower(*needle); - while (e >= p) { - if (tolower(*e) == *ord_needle) { - RETURN_LONG(e - p + (offset > 0 ? offset : 0)); + + pos = php_u_stristr(u_p, needle.u, u_e-u_p+needle_len, needle_len, 0 TSRMLS_CC); + if (pos) { + if (offset > 0) { + RETURN_LONG(offset + u_countChar32(u_p, (UChar*)pos - u_p)); + } else { + RETURN_LONG(u_countChar32(haystack.u, (UChar*)pos - haystack.u)); } - e--; + } else { + RETURN_FALSE; } - RETURN_FALSE; - } + } else { + if (needle_len == 1) { + /* Single character search can shortcut memcmps + Can also avoid tolower emallocs */ + if (offset >= 0) { + if (offset > haystack_len) { + RETURN_FALSE; + } + p = haystack.s + offset; + e = haystack.s + haystack_len - 1; + } else { + p = haystack.s; + if (-offset > haystack_len) { + RETURN_FALSE; + } else { + e = haystack.s + haystack_len + offset; + } + } + /* Borrow that needle_char buffer to avoid repeatedly tolower()ing needle */ + *needle_char = tolower(*needle.s); + while (e >= p) { + if (tolower(*e) == *needle_char) { + RETURN_LONG(e - p + (offset > 0 ? offset : 0)); + } + e--; + } + RETURN_FALSE; + } + + needle_dup = estrndup(needle.s, needle_len); + php_strtolower(needle_dup, needle_len); + haystack_dup = estrndup(haystack.s, haystack_len); + php_strtolower(haystack_dup, haystack_len); - needle_dup = estrndup(needle, needle_len); - php_strtolower(needle_dup, needle_len); - haystack_dup = estrndup(haystack, haystack_len); - php_strtolower(haystack_dup, haystack_len); - - if (offset >= 0) { - p = haystack_dup + offset; - e = haystack_dup + haystack_len - needle_len; - } else { - p = haystack_dup; - if (-offset > haystack_len) { - e = haystack_dup - needle_len; - } else if (needle_len > -offset) { + if (offset >= 0) { + if (offset > haystack_len) { + efree(haystack_dup); + efree(needle_dup); + RETURN_FALSE; + } + p = haystack_dup + offset; e = haystack_dup + haystack_len - needle_len; } else { - e = haystack_dup + haystack_len + offset; + if (-offset > haystack_len) { + efree(haystack_dup); + efree(needle_dup); + RETURN_FALSE; + } + p = haystack_dup; + if (needle_len > -offset) { + e = haystack_dup + haystack_len - needle_len; + } else { + e = haystack_dup + haystack_len + offset; + } } - } - while (e >= p) { - if (memcmp(e, needle_dup, needle_len) == 0) { - efree(haystack_dup); - efree(needle_dup); - RETURN_LONG(e - p + (offset > 0 ? offset : 0)); + while (e >= p) { + if (memcmp(e, needle_dup, needle_len) == 0) { + efree(haystack_dup); + efree(needle_dup); + RETURN_LONG(e - p + (offset > 0 ? offset : 0)); + } + e--; } - e--; - } - efree(haystack_dup); - efree(needle_dup); - RETURN_FALSE; + efree(haystack_dup); + efree(needle_dup); + + RETURN_FALSE; + } } /* }}} */ http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.52&r2=1.53&diff_format=u Index: php-src/unicode-progress.txt diff -u php-src/unicode-progress.txt:1.52 php-src/unicode-progress.txt:1.53 --- php-src/unicode-progress.txt:1.52 Mon Oct 2 19:18:14 2006 +++ php-src/unicode-progress.txt Tue Oct 3 18:13:36 2006 @@ -26,7 +26,6 @@ sscanf() Params API. Rest - no idea yet. - strripos() str_replace() stri_replace() substr_compare() @@ -161,6 +160,7 @@ strpbrk() strpos() strrchr() + strripos() strrev() strrpos() strspn()
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php