rolland Thu Oct 20 15:25:56 2005 EDT
Modified files:
/php-src/ext/standard string.c php_string.h
Log:
- Unicode impl of stristr()
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.498&r2=1.499&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.498 php-src/ext/standard/string.c:1.499
--- php-src/ext/standard/string.c:1.498 Wed Oct 19 15:10:13 2005
+++ php-src/ext/standard/string.c Thu Oct 20 15:25:52 2005
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.498 2005/10/19 19:10:13 rolland Exp $ */
+/* $Id: string.c,v 1.499 2005/10/20 19:25:52 rolland Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -1915,6 +1915,42 @@
}
/* }}} */
+/* {{{ php_u_stristr
+ Unicode version of case insensitve strstr */
+PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len)
+{
+ int32_t i,j, last;
+ UChar32 ch1, ch2;
+
+ /* Have to do this by hand since lower-casing can change lengths
+ by changing codepoints, and an offset within the lower-case &
+ upper-case strings might be different codepoints
+ */
+ i = 0;
+ while (i <= (s_len-t_len)) {
+ last = i;
+ U16_NEXT(s, i, s_len, ch1);
+ U16_GET(t, 0, 0, t_len, ch2);
+ if (u_tolower(ch1) == u_tolower(ch2)) {
+ j = 0;
+ U16_FWD_1(t, j, t_len);
+ while (j < t_len) {
+ U16_NEXT(s, i, s_len, ch1);
+ U16_NEXT(t, j, t_len, ch2);
+ if (u_tolower(ch1) != u_tolower(ch2)) {
+ U16_BACK_1(s, 0, i);
+ break;
+ }
+ }
+ if (u_tolower(ch1) == u_tolower(ch2)) {
+ return s+last;
+ }
+ }
+ }
+ return NULL;
+}
+/* }}} */
+
/* {{{ php_stristr
case insensitve strstr */
PHPAPI char *php_stristr(unsigned char *s, unsigned char *t, size_t s_len,
size_t t_len)
@@ -2005,61 +2041,103 @@
Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(stristr)
{
- char *haystack;
- long haystack_len;
- zval *needle;
+ zval *haystack, *needle;
zend_bool part = 0;
- char *found = NULL;
- int found_offset;
- char *haystack_orig;
+ zend_uchar str_type;
char needle_char[2];
-
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz|b", &haystack,
&haystack_len, &needle, &part) == FAILURE) {
+ UChar u_needle_char[3];
+ int32_t needle_len;
+ char *haystack_copy;
+ void *target;
+ void *found = NULL;
+ int found_offset;
+ void *start, *end;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz|b", &haystack,
&needle, &part) == FAILURE) {
return;
}
-
+ SEPARATE_ZVAL(&haystack);
SEPARATE_ZVAL(&needle);
+ if (Z_TYPE_P(haystack) != IS_UNICODE && Z_TYPE_P(haystack) != IS_BINARY
&& Z_TYPE_P(haystack) != IS_STRING) {
+ convert_to_text(haystack);
+ }
- haystack_orig = estrndup(haystack, haystack_len);
-
- if (Z_TYPE_P(needle) == IS_STRING) {
- if (!Z_STRLEN_P(needle)) {
+ if (Z_TYPE_P(needle) == IS_UNICODE || Z_TYPE_P(needle) == IS_BINARY ||
Z_TYPE_P(needle) == IS_STRING) {
+ if (!Z_UNILEN_P(needle)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty
delimiter.");
- efree(haystack_orig);
RETURN_FALSE;
}
-
- found = php_stristr(haystack,
- Z_STRVAL_P(needle),
- haystack_len,
- Z_STRLEN_P(needle));
+ if (Z_TYPE_P(haystack) != Z_TYPE_P(needle)) {
+ str_type = zend_get_unified_string_type(2 TSRMLS_CC,
Z_TYPE_P(haystack), Z_TYPE_P(needle));
+ if (str_type == (zend_uchar)-1) {
+ convert_to_explicit_type(haystack, IS_BINARY);
+ convert_to_explicit_type(needle, IS_BINARY);
+ } else {
+ convert_to_explicit_type(haystack, str_type);
+ convert_to_explicit_type(needle, str_type);
+ }
+ }
+ target = Z_UNIVAL_P(needle);
+ needle_len = Z_UNILEN_P(needle);
} else {
convert_to_long_ex(&needle);
- needle_char[0] = (char) Z_LVAL_P(needle);
- needle_char[1] = 0;
+ needle_len = 0;
+ if (Z_TYPE_P(haystack) == IS_UNICODE) {
+ if (Z_LVAL_P(needle) < 0 || Z_LVAL_P(needle) >
0x10FFFF) {
+ php_error(E_WARNING, "Needle argument codepoint
value out of range (0 - 0x10FFFF)");
+ RETURN_FALSE;
+ }
+ if (U_IS_BMP(Z_LVAL_P(needle))) {
+ u_needle_char[needle_len++] =
(UChar)Z_LVAL_P(needle);
+ u_needle_char[needle_len] = 0;
+ } else {
+ u_needle_char[needle_len++] =
(UChar)U16_LEAD(Z_LVAL_P(needle));
+ u_needle_char[needle_len++] =
(UChar)U16_TRAIL(Z_LVAL_P(needle));
+ u_needle_char[needle_len] = 0;
+ }
+ target = u_needle_char;
+ } else {
+ needle_char[needle_len++] = (char)Z_LVAL_P(needle);
+ needle_char[needle_len] = 0;
+ target = needle_char;
+ }
+ }
+
+ if (needle_len > Z_UNILEN_P(haystack)) {
+ RETURN_FALSE;
+ }
- found = php_stristr(haystack,
- needle_char,
- haystack_len,
- 1);
+ if (Z_TYPE_P(haystack) == IS_UNICODE) {
+ found = php_u_stristr(Z_USTRVAL_P(haystack), (UChar *)target,
+
Z_USTRLEN_P(haystack), needle_len);
+ } else {
+ haystack_copy = estrndup(Z_STRVAL_P(haystack),
Z_STRLEN_P(haystack));
+ found = php_stristr(Z_STRVAL_P(haystack), (char *)target,
+ Z_STRLEN_P(haystack),
needle_len);
}
if (found) {
- found_offset = found - haystack;
- if (part) {
- char *ret;
- ret = emalloc(found_offset + 1);
- strncpy(ret, haystack_orig, found_offset);
- ret[found_offset] = '\0';
- RETVAL_STRINGL(ret , found_offset, 0);
- } else {
- RETVAL_STRINGL(haystack_orig + found_offset,
haystack_len - found_offset, 1);
+ if (Z_TYPE_P(haystack) == IS_UNICODE) {
+ start = part ? Z_USTRVAL_P(haystack) : found;
+ end = part ? found : (Z_USTRVAL_P(haystack) +
Z_USTRLEN_P(haystack));
+ RETVAL_UNICODEL((UChar *)start, (UChar *)end-(UChar
*)start, 1);
+ } else {
+ found_offset = (char *)found - Z_STRVAL_P(haystack);
+ start = part ? haystack_copy : haystack_copy +
found_offset;
+ end = part ? haystack_copy + found_offset :
(haystack_copy + Z_STRLEN_P(haystack));
+ if (Z_TYPE_P(haystack) == IS_BINARY) {
+ RETVAL_BINARYL((char *)start, (char *)end-(char
*)start, 1);
+ } else {
+ RETVAL_STRINGL((char *)start, (char *)end-(char
*)start, 1);
+ }
}
} else {
RETVAL_FALSE;
}
- efree(haystack_orig);
+ if (Z_TYPE_P(haystack) != IS_UNICODE) {
+ efree(haystack_copy);
+ }
}
/* }}} */
http://cvs.php.net/diff.php/php-src/ext/standard/php_string.h?r1=1.91&r2=1.92&ty=u
Index: php-src/ext/standard/php_string.h
diff -u php-src/ext/standard/php_string.h:1.91
php-src/ext/standard/php_string.h:1.92
--- php-src/ext/standard/php_string.h:1.91 Thu Oct 6 17:40:30 2005
+++ php-src/ext/standard/php_string.h Thu Oct 20 15:25:54 2005
@@ -17,7 +17,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_string.h,v 1.91 2005/10/06 21:40:30 fmk Exp $ */
+/* $Id: php_string.h,v 1.92 2005/10/20 19:25:54 rolland Exp $ */
/* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */
@@ -130,6 +130,7 @@
PHPAPI void php_stripcslashes(char *str, int *len);
PHPAPI void php_basename(char *s, size_t len, char *suffix, size_t sufflen,
char **p_ret, size_t *p_len TSRMLS_DC);
PHPAPI size_t php_dirname(char *str, size_t len);
+PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len);
PHPAPI char *php_stristr(unsigned char *s, unsigned char *t, size_t s_len,
size_t t_len);
PHPAPI char *php_str_to_str_ex(char *haystack, int length, char *needle,
int needle_len, char *str, int str_len, int *_new_length, int
case_sensitivity, int *replace_count);
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php