andrei Tue Oct 3 18:13:36 2006 UTC
Modified files:
/php-src/ext/standard string.c
/php-src unicode-progress.txt
Log:
Unicode support in strripos().
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.599&r2=1.600&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.599 php-src/ext/standard/string.c:1.600
--- php-src/ext/standard/string.c:1.599 Tue Oct 3 17:45:16 2006
+++ php-src/ext/standard/string.c Tue Oct 3 18:13:36 2006
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.599 2006/10/03 17:45:16 iliaa Exp $ */
+/* $Id: string.c,v 1.600 2006/10/03 18:13:36 andrei Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -2156,7 +2156,7 @@
/* {{{ php_u_stristr
Unicode version of case insensitve strstr */
-PHPAPI UChar *php_u_stristr(UChar *str, UChar *pat, int str_len, int pat_len
TSRMLS_DC)
+PHPAPI UChar *php_u_stristr(UChar *str, UChar *pat, int str_len, int pat_len,
zend_bool find_first TSRMLS_DC)
{
UChar *str_fold, *pat_fold;
int str_fold_len, pat_fold_len;
@@ -2167,7 +2167,11 @@
zend_case_fold_string(&str_fold, &str_fold_len, str, str_len,
U_FOLD_CASE_DEFAULT, &status);
if (str_fold_len == str_len) {
zend_case_fold_string(&pat_fold, &pat_fold_len, pat, pat_len,
U_FOLD_CASE_DEFAULT, &status);
- found = u_strFindFirst(str_fold, str_fold_len, pat_fold,
pat_fold_len);
+ if (find_first) {
+ found = u_strFindFirst(str_fold, str_fold_len,
pat_fold, pat_fold_len);
+ } else {
+ found = u_strFindLast(str_fold, str_fold_len, pat_fold,
pat_fold_len);
+ }
if (found) {
result = str + (found - str_fold);
} else {
@@ -2179,7 +2183,11 @@
usearch_setPattern(UG(root_search), pat, pat_len, &status);
usearch_setOffset(UG(root_search), 0, &status);
- offset = usearch_first(UG(root_search), &status);
+ if (find_first) {
+ offset = usearch_first(UG(root_search), &status);
+ } else {
+ offset = usearch_last(UG(root_search), &status);
+ }
if (offset != USEARCH_DONE) {
result = str + offset;
} else {
@@ -2388,7 +2396,7 @@
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
found = php_u_stristr(Z_USTRVAL_PP(haystack), target.u,
-
Z_USTRLEN_PP(haystack), needle_len TSRMLS_CC);
+
Z_USTRLEN_PP(haystack), needle_len, 1 TSRMLS_CC);
} else {
haystack_copy = estrndup(Z_STRVAL_PP(haystack),
Z_STRLEN_PP(haystack));
found = php_stristr(Z_STRVAL_PP(haystack), target.s,
@@ -2668,7 +2676,7 @@
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
/* calculate code unit offset */
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset,
haystack_len, offset);
- found = php_u_stristr(Z_USTRVAL_PP(haystack) +
cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len TSRMLS_CC);
+ found = php_u_stristr(Z_USTRVAL_PP(haystack) +
cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len, 1 TSRMLS_CC);
} else {
haystack_dup = estrndup(Z_STRVAL_PP(haystack),
haystack_len);
php_strtolower((char *)haystack_dup, haystack_len);
@@ -2690,7 +2698,7 @@
/* calculate code unit offset */
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset,
haystack_len, offset);
found = php_u_stristr(Z_USTRVAL_PP(haystack) +
cu_offset,
-
u_needle_char, haystack_len, needle_len TSRMLS_CC);
+
u_needle_char, haystack_len, needle_len, 1 TSRMLS_CC);
} else {
c = tolower((char)Z_LVAL_PP(needle));
needle_char[0] = c;
@@ -2758,6 +2766,7 @@
needle = Z_UNIVAL_PP(zneedle);
needle_len = Z_UNILEN_PP(zneedle);
} else {
+ convert_to_long_ex(zneedle);
if (Z_TYPE_PP(zhaystack) == IS_UNICODE) {
if (Z_LVAL_PP(zneedle) < 0 || Z_LVAL_PP(zneedle) >
0x10FFFF) {
php_error(E_WARNING, "Needle argument codepoint
value out of range (0 - 0x10FFFF)");
@@ -2767,7 +2776,6 @@
u_ord_needle[needle_len] = 0;
needle.u = u_ord_needle;
} else {
- convert_to_long_ex(zneedle);
ord_needle[0] = (char)(Z_LVAL_PP(zneedle) & 0xFF);
ord_needle[1] = '\0';
needle.s = ord_needle;
@@ -2860,92 +2868,158 @@
}
/* }}} */
-/* {{{ proto int strripos(string haystack, string needle [, int offset])
+/* {{{ proto int strripos(string haystack, string needle [, int offset]) U
Finds position of last occurrence of a string within another string */
PHP_FUNCTION(strripos)
{
+ zstr haystack, needle;
zval **zneedle;
- char *needle, *haystack;
int needle_len, haystack_len;
+ zend_uchar haystack_type;
long offset = 0;
- char *p, *e, ord_needle[2];
+ char *p, *e, needle_char[2];
+ UChar *u_p, *u_e, *pos;
+ UChar u_needle_char[3];
char *needle_dup, *haystack_dup;
+ int cu_offset = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ|l", &haystack,
&haystack_len, &zneedle, &offset) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|l", &haystack,
+ &haystack_len,
&haystack_type, &zneedle, &offset) == FAILURE) {
RETURN_FALSE;
}
- if (Z_TYPE_PP(zneedle) == IS_STRING) {
- needle = Z_STRVAL_PP(zneedle);
- needle_len = Z_STRLEN_PP(zneedle);
+ if (Z_TYPE_PP(zneedle) == IS_STRING || Z_TYPE_PP(zneedle) ==
IS_UNICODE) {
+ convert_to_explicit_type_ex(zneedle, haystack_type);
+ needle = Z_UNIVAL_PP(zneedle);
+ needle_len = Z_UNILEN_PP(zneedle);
} else {
convert_to_long_ex(zneedle);
- ord_needle[0] = (char)(Z_LVAL_PP(zneedle) & 0xFF);
- ord_needle[1] = '\0';
- needle = ord_needle;
- needle_len = 1;
+ if (haystack_type == IS_UNICODE) {
+ if (Z_LVAL_PP(zneedle) < 0 || Z_LVAL_PP(zneedle) >
0x10FFFF) {
+ php_error(E_WARNING, "Needle argument codepoint
value out of range (0 - 0x10FFFF)");
+ RETURN_FALSE;
+ }
+ needle_len =
zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(zneedle), u_needle_char);
+ u_needle_char[needle_len] = 0;
+ needle.u = u_needle_char;
+ } else {
+ needle_char[0] = (char)(Z_LVAL_PP(zneedle) & 0xFF);
+ needle_char[1] = '\0';
+ needle.s = needle_char;
+ needle_len = 1;
+ }
}
- if ((haystack_len == 0) || (needle_len == 0)) {
+ if ((haystack_len == 0) || (needle_len == 0) || needle_len >
haystack_len) {
RETURN_FALSE;
}
- if (needle_len == 1) {
- /* Single character search can shortcut memcmps
- Can also avoid tolower emallocs */
+ if (haystack_type == IS_UNICODE) {
if (offset >= 0) {
- p = haystack + offset;
- e = haystack + haystack_len - 1;
+ U16_FWD_N(haystack.u, cu_offset, haystack_len, offset);
+ if (cu_offset > haystack_len - needle_len) {
+ RETURN_FALSE;
+ }
+ u_p = haystack.u + cu_offset;
+ u_e = haystack.u + haystack_len - needle_len;
} else {
- p = haystack;
+ u_p = haystack.u;
if (-offset > haystack_len) {
- e = haystack + haystack_len - 1;
+ RETURN_FALSE;
} else {
- e = haystack + haystack_len + offset;
+ cu_offset = haystack_len;
+ U16_BACK_N(haystack.u, 0, cu_offset, -offset);
+ if (cu_offset == 0) {
+ RETURN_FALSE;
+ }
+ if (needle_len > haystack_len - cu_offset) {
+ u_e = haystack.u + haystack_len -
needle_len;
+ } else {
+ u_e = haystack.u + cu_offset;
+ }
}
}
- /* Borrow that ord_needle buffer to avoid repeatedly
tolower()ing needle */
- *ord_needle = tolower(*needle);
- while (e >= p) {
- if (tolower(*e) == *ord_needle) {
- RETURN_LONG(e - p + (offset > 0 ? offset : 0));
+
+ pos = php_u_stristr(u_p, needle.u, u_e-u_p+needle_len,
needle_len, 0 TSRMLS_CC);
+ if (pos) {
+ if (offset > 0) {
+ RETURN_LONG(offset + u_countChar32(u_p,
(UChar*)pos - u_p));
+ } else {
+ RETURN_LONG(u_countChar32(haystack.u,
(UChar*)pos - haystack.u));
}
- e--;
+ } else {
+ RETURN_FALSE;
}
- RETURN_FALSE;
- }
+ } else {
+ if (needle_len == 1) {
+ /* Single character search can shortcut memcmps
+ Can also avoid tolower emallocs */
+ if (offset >= 0) {
+ if (offset > haystack_len) {
+ RETURN_FALSE;
+ }
+ p = haystack.s + offset;
+ e = haystack.s + haystack_len - 1;
+ } else {
+ p = haystack.s;
+ if (-offset > haystack_len) {
+ RETURN_FALSE;
+ } else {
+ e = haystack.s + haystack_len + offset;
+ }
+ }
+ /* Borrow that needle_char buffer to avoid repeatedly
tolower()ing needle */
+ *needle_char = tolower(*needle.s);
+ while (e >= p) {
+ if (tolower(*e) == *needle_char) {
+ RETURN_LONG(e - p + (offset > 0 ?
offset : 0));
+ }
+ e--;
+ }
+ RETURN_FALSE;
+ }
+
+ needle_dup = estrndup(needle.s, needle_len);
+ php_strtolower(needle_dup, needle_len);
+ haystack_dup = estrndup(haystack.s, haystack_len);
+ php_strtolower(haystack_dup, haystack_len);
- needle_dup = estrndup(needle, needle_len);
- php_strtolower(needle_dup, needle_len);
- haystack_dup = estrndup(haystack, haystack_len);
- php_strtolower(haystack_dup, haystack_len);
-
- if (offset >= 0) {
- p = haystack_dup + offset;
- e = haystack_dup + haystack_len - needle_len;
- } else {
- p = haystack_dup;
- if (-offset > haystack_len) {
- e = haystack_dup - needle_len;
- } else if (needle_len > -offset) {
+ if (offset >= 0) {
+ if (offset > haystack_len) {
+ efree(haystack_dup);
+ efree(needle_dup);
+ RETURN_FALSE;
+ }
+ p = haystack_dup + offset;
e = haystack_dup + haystack_len - needle_len;
} else {
- e = haystack_dup + haystack_len + offset;
+ if (-offset > haystack_len) {
+ efree(haystack_dup);
+ efree(needle_dup);
+ RETURN_FALSE;
+ }
+ p = haystack_dup;
+ if (needle_len > -offset) {
+ e = haystack_dup + haystack_len - needle_len;
+ } else {
+ e = haystack_dup + haystack_len + offset;
+ }
}
- }
- while (e >= p) {
- if (memcmp(e, needle_dup, needle_len) == 0) {
- efree(haystack_dup);
- efree(needle_dup);
- RETURN_LONG(e - p + (offset > 0 ? offset : 0));
+ while (e >= p) {
+ if (memcmp(e, needle_dup, needle_len) == 0) {
+ efree(haystack_dup);
+ efree(needle_dup);
+ RETURN_LONG(e - p + (offset > 0 ? offset : 0));
+ }
+ e--;
}
- e--;
- }
- efree(haystack_dup);
- efree(needle_dup);
- RETURN_FALSE;
+ efree(haystack_dup);
+ efree(needle_dup);
+
+ RETURN_FALSE;
+ }
}
/* }}} */
http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.52&r2=1.53&diff_format=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.52 php-src/unicode-progress.txt:1.53
--- php-src/unicode-progress.txt:1.52 Mon Oct 2 19:18:14 2006
+++ php-src/unicode-progress.txt Tue Oct 3 18:13:36 2006
@@ -26,7 +26,6 @@
sscanf()
Params API. Rest - no idea yet.
- strripos()
str_replace()
stri_replace()
substr_compare()
@@ -161,6 +160,7 @@
strpbrk()
strpos()
strrchr()
+ strripos()
strrev()
strrpos()
strspn()
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php