andrei Mon Oct 2 16:52:22 2006 UTC
Modified files:
/php-src unicode-progress.txt
/php-src/ext/standard string.c
Log:
Make stripos() work with Unicode strings.
http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.50&r2=1.51&diff_format=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.50 php-src/unicode-progress.txt:1.51
--- php-src/unicode-progress.txt:1.50 Fri Sep 22 19:35:05 2006
+++ php-src/unicode-progress.txt Mon Oct 2 16:52:21 2006
@@ -27,7 +27,6 @@
Params API. Rest - no idea yet.
stristr()
- stripos()
strripos()
str_replace()
stri_replace()
@@ -158,6 +157,7 @@
strip_tags()
stripcslashes()
stripslashes()
+ stripos()
strpbrk()
strpos()
strrchr()
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.595&r2=1.596&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.595 php-src/ext/standard/string.c:1.596
--- php-src/ext/standard/string.c:1.595 Mon Oct 2 01:11:04 2006
+++ php-src/ext/standard/string.c Mon Oct 2 16:52:22 2006
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.595 2006/10/02 01:11:04 pollita Exp $ */
+/* $Id: string.c,v 1.596 2006/10/02 16:52:22 andrei Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -2625,7 +2625,7 @@
}
/* }}} */
-/* {{{ proto int stripos(string haystack, string needle [, int offset])
+/* {{{ proto int stripos(string haystack, string needle [, int offset]) U
Finds position of first occurrence of a string within another, case
insensitive */
PHP_FUNCTION(stripos)
{
@@ -2633,12 +2633,12 @@
long offset = 0;
int haystack_len, needle_len = 0;
zend_uchar str_type;
- void *haystack_dup, *needle_dup = NULL;
+ void *haystack_dup = NULL, *needle_dup = NULL;
char needle_char[2];
char c = 0;
- UChar u_needle_char[3];
- UChar32 ch = 0;
+ UChar u_needle_char[8];
void *found = NULL;
+ int cu_offset = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZ|l", &haystack,
&needle, &offset) == FAILURE) {
return;
@@ -2662,6 +2662,7 @@
if (!Z_UNILEN_PP(needle) || Z_UNILEN_PP(needle) > haystack_len)
{
RETURN_FALSE;
}
+ /* convert both strings to the same type */
if (Z_TYPE_PP(haystack) != Z_TYPE_PP(needle)) {
str_type = zend_get_unified_string_type(2 TSRMLS_CC,
Z_TYPE_PP(haystack), Z_TYPE_PP(needle));
convert_to_explicit_type_ex(haystack, str_type);
@@ -2669,11 +2670,9 @@
}
needle_len = Z_UNILEN_PP(needle);
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
- haystack_dup = php_u_strtolower(Z_USTRVAL_PP(haystack),
&haystack_len, UG(default_locale));
- needle_dup = php_u_strtolower(Z_USTRVAL_PP(needle),
&needle_len, UG(default_locale));
- found = zend_u_memnstr((UChar *)haystack_dup + offset,
- (UChar
*)needle_dup, needle_len,
- (UChar
*)haystack_dup + haystack_len);
+ /* calculate codeunit offset */
+ U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset,
haystack_len, offset);
+ found = php_u_stristr(Z_USTRVAL_PP(haystack) +
cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len TSRMLS_CC);
} else {
haystack_dup = estrndup(Z_STRVAL_PP(haystack),
haystack_len);
php_strtolower((char *)haystack_dup, haystack_len);
@@ -2688,14 +2687,22 @@
case IS_LONG:
case IS_BOOL:
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
- ch =
u_tolower((UChar32)Z_LVAL_PP(needle));
+ if (Z_LVAL_PP(needle) < 0 ||
Z_LVAL_PP(needle) > 0x10FFFF) {
+ php_error(E_WARNING, "Needle
argument codepoint value out of range (0 - 0x10FFFF)");
+ RETURN_FALSE;
+ }
+ needle_len =
zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
} else {
c = tolower((char)Z_LVAL_PP(needle));
}
break;
case IS_DOUBLE:
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
- ch =
u_tolower((UChar32)Z_DVAL_PP(needle));
+ if ((UChar32)Z_DVAL_PP(needle) < 0 ||
(UChar32)Z_DVAL_PP(needle) > 0x10FFFF) {
+ php_error(E_WARNING, "Needle
argument codepoint value out of range (0 - 0x10FFFF)");
+ RETURN_FALSE;
+ }
+ needle_len =
zend_codepoint_to_uchar((UChar32)Z_DVAL_PP(needle), u_needle_char);
} else {
c = tolower((char)Z_DVAL_PP(needle));
}
@@ -2707,18 +2714,12 @@
}
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
- if (U_IS_BMP(ch)) {
- u_needle_char[needle_len++] = ch;
- u_needle_char[needle_len] = 0;
- } else {
- u_needle_char[needle_len++] = U16_LEAD(ch);
- u_needle_char[needle_len++] = U16_TRAIL(ch);
- u_needle_char[needle_len] = 0;
- }
- haystack_dup = php_u_strtolower(Z_USTRVAL_PP(haystack),
&haystack_len, UG(default_locale));
- found = zend_u_memnstr((UChar *)haystack_dup + offset,
- (UChar
*)u_needle_char, needle_len,
- (UChar
*)haystack_dup + haystack_len);
+ /* calculate codeunit offset */
+ U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset,
haystack_len, offset);
+ u_needle_char[needle_len] = 0;
+ found = php_u_stristr(Z_USTRVAL_PP(haystack) +
cu_offset,
+
u_needle_char, haystack_len, needle_len TSRMLS_CC);
+
} else {
needle_char[0] = c;
needle_char[1] = '\0';
@@ -2731,14 +2732,21 @@
}
}
- efree(haystack_dup);
+ if (haystack_dup) {
+ efree(haystack_dup);
+ }
if (needle_dup) {
efree(needle_dup);
}
if (found) {
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
- RETURN_LONG((UChar *)found - (UChar *)haystack_dup);
+ /* Simple subtraction will not suffice, since there may
be
+ supplementary codepoints. We count how many
codepoints there are
+ between the starting offset and the found location
and add them
+ to the starting codepoint offset. */
+ RETURN_LONG(offset +
u_countChar32(Z_USTRVAL_PP(haystack) + cu_offset,
+
(UChar*)found - (Z_USTRVAL_PP(haystack) + cu_offset)));
} else {
RETURN_LONG((char *)found - (char *)haystack_dup);
}
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php