[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Oct 24 10:35:05 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of strrchr() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.503r2=1.504ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.503 php-src/ext/standard/string.c:1.504 --- php-src/ext/standard/string.c:1.503 Sat Oct 22 09:36:55 2005 +++ php-src/ext/standard/string.c Mon Oct 24 10:35:02 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.503 2005/10/22 13:36:55 rolland Exp $ */ +/* $Id: string.c,v 1.504 2005/10/24 14:35:02 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2714,30 +2714,82 @@ } /* }}} */ +/* {{{ php_u_strrchr + */ +UChar *php_u_strrchr(UChar *s, UChar32 ch, int32_t s_len) +{ + UChar32 ch1; + int32_t i = s_len; + + while (i 0) { + U16_PREV(s, 0, i, ch1); + if (ch1 == ch) { + return (s+i); + } + } + return NULL; +} +/* }}} */ + /* {{{ proto string strrchr(string haystack, string needle) Finds the last occurrence of a character in a string within another */ PHP_FUNCTION(strrchr) { - zval **haystack, **needle; - char *found = NULL; - long found_offset; - - if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, haystack, needle) == - FAILURE) { + zval *haystack, *needle; + zend_uchar str_type; + UChar32 ch; + void *found = NULL; + int32_t found_offset; + + if (ZEND_NUM_ARGS() != 2 || zend_parse_parameters(2 TSRMLS_CC, zz, haystack, needle) == FAILURE) { WRONG_PARAM_COUNT; } - convert_to_string_ex(haystack); + if (Z_TYPE_P(haystack) != IS_UNICODE || Z_TYPE_P(haystack) != IS_BINARY || Z_TYPE_P(haystack) != IS_STRING) { + convert_to_string(haystack); + } - if (Z_TYPE_PP(needle) == IS_STRING) { - found = strrchr(Z_STRVAL_PP(haystack), *Z_STRVAL_PP(needle)); + if (Z_TYPE_P(needle) == IS_UNICODE || Z_TYPE_P(needle) == IS_BINARY || Z_TYPE_P(needle) == IS_STRING) { + if (Z_TYPE_P(needle) != Z_TYPE_P(haystack)) { + str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_P(haystack), Z_TYPE_P(needle)); + if (str_type == (zend_uchar)-1) { + convert_to_explicit_type(haystack, IS_BINARY); + convert_to_explicit_type(needle, IS_BINARY); + } else { + convert_to_explicit_type(haystack, str_type); + convert_to_explicit_type(needle, str_type); + } + } + if (Z_TYPE_P(haystack) == IS_UNICODE) { + U16_GET(Z_USTRVAL_P(needle), 0, 0, Z_USTRLEN_P(needle), ch); + found = php_u_strrchr(Z_USTRVAL_P(haystack), ch, Z_USTRLEN_P(haystack)); + } else { + found = strrchr(Z_STRVAL_P(haystack), *Z_STRVAL_P(needle)); + } } else { - convert_to_long_ex(needle); - found = strrchr(Z_STRVAL_PP(haystack), (char) Z_LVAL_PP(needle)); + convert_to_long(needle); + if (Z_TYPE_P(haystack) == IS_UNICODE) { + if (Z_LVAL_P(needle) 0 || Z_LVAL_P(needle) 0x10) { + php_error(E_WARNING, Needle argument codepoint value out of range (0 - 0x10)); + RETURN_FALSE; + } + found = php_u_strrchr(Z_USTRVAL_P(haystack), (UChar32)Z_LVAL_P(needle), Z_USTRLEN_P(haystack)); + } else { + found = strrchr(Z_STRVAL_P(haystack), (char)Z_LVAL_P(needle)); + } } if (found) { - found_offset = found - Z_STRVAL_PP(haystack); - RETURN_STRINGL(found, Z_STRLEN_PP(haystack) - found_offset, 1); + if (Z_TYPE_P(haystack) == IS_UNICODE) { + found_offset = (UChar *)found - Z_USTRVAL_P(haystack); + RETURN_UNICODEL((UChar *)found, Z_USTRLEN_P(haystack) - found_offset, 1); + } else { + found_offset = (char *)found - Z_STRVAL_P(haystack); + if (Z_TYPE_P(haystack) == IS_BINARY) { + RETURN_BINARYL((char *)found, Z_BINLEN_P(haystack) - found_offset, 1); + } else { + RETURN_STRINGL((char *)found, Z_STRLEN_P(haystack) - found_offset, 1); + } + } } else { RETURN_FALSE; } -- PHP CVS Mailing List (http://www.php.net/)
[PHP-CVS] cvs: php-src / unicode-progress.txt
rolland Mon Oct 24 10:51:18 2005 EDT Modified files: /php-srcunicode-progress.txt Log: - strip_tags(), str[c]spn(), strpbrk(), strrchr(), strrpos(), strtok() http://cvs.php.net/diff.php/php-src/unicode-progress.txt?r1=1.10r2=1.11ty=u Index: php-src/unicode-progress.txt diff -u php-src/unicode-progress.txt:1.10 php-src/unicode-progress.txt:1.11 --- php-src/unicode-progress.txt:1.10 Mon Oct 3 02:19:51 2005 +++ php-src/unicode-progress.txtMon Oct 24 10:51:17 2005 @@ -17,16 +17,23 @@ similar_text() str_pad() str_repeat() +strip_tags() +strcspn() stripslashes() +strpbrk() strpos() +strrchr() strrev() +strrpos() +strspn() strstr() +strtok() substr() substr_count() substr_replace() trim() -ucwords() ucfirst() +ucwords() Zend Engine -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
Re: [PHP-CVS] cvs: php-src /ext/standard string.c
--- Jani Taskinen [EMAIL PROTECTED] wrote: What's a codept ?? --Jani Ah, that would be codepoint On Sat, 22 Oct 2005, Rolland Santimano wrote: rolland Sat Oct 22 01:52:55 2005 EDT Modified files: /php-src/ext/standardstring.c Log: - php_u_stristr: Code comments http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.499r2=1.500ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.499 php-src/ext/standard/string.c:1.500 --- php-src/ext/standard/string.c:1.499 Thu Oct 20 15:25:52 2005 +++ php-src/ext/standard/string.c Sat Oct 22 01:52:53 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.499 2005/10/20 19:25:52 rolland Exp $ */ +/* $Id: string.c,v 1.500 2005/10/22 05:52:53 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -1924,20 +1924,33 @@ /* Have to do this by hand since lower-casing can change lengths by changing codepoints, and an offset within the lower-case - upper-case strings might be different codepoints + upper-case strings might be different codepoints. + + Find an occurrence of the first codept of 't' in 's', and + starting from this point, match the rest of the codepts of 't' + with those in 's'. Comparisons are performed against lower-case + equivalents of the codepoints being matched. + + 'i' 'j' are indices used for extracting codepts 'ch1' + 'ch2'. 'last' is offset in 's' where the search for 't' + started, and indicates beginning of 't' in 's' for a successful + match. */ + -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Sat Oct 22 09:25:02 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of strpbrk() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.500r2=1.501ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.500 php-src/ext/standard/string.c:1.501 --- php-src/ext/standard/string.c:1.500 Sat Oct 22 01:52:53 2005 +++ php-src/ext/standard/string.c Sat Oct 22 09:25:00 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.500 2005/10/22 05:52:53 rolland Exp $ */ +/* $Id: string.c,v 1.501 2005/10/22 13:25:00 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -6585,11 +6585,14 @@ Search a string for any of a set of characters */ PHP_FUNCTION(strpbrk) { - char *haystack, *char_list; - int haystack_len, char_list_len; - char *p; + void *haystack, *char_list; + int32_t haystack_len, char_list_len; + zend_uchar haystack_type, char_list_type; + void *p = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ss, haystack, haystack_len, char_list, char_list_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, TT, + haystack, haystack_len, haystack_type, + char_list, char_list_len, char_list_type) == FAILURE) { RETURN_FALSE; } @@ -6598,8 +6601,36 @@ RETURN_FALSE; } - if ((p = strpbrk(haystack, char_list))) { - RETURN_STRINGL(p, (haystack + haystack_len - p), 1); + if (haystack_type == IS_UNICODE) { + int32_t i, j; + UChar32 ch1, ch2; + + for (i = 0 ; i haystack_len ; ) { + U16_NEXT((UChar *)haystack, i, haystack_len, ch1); + for (j = 0 ; j char_list_len ; ) { + U16_NEXT((UChar *)char_list, j, char_list_len, ch2); + if (ch1 == ch2) { + U16_BACK_1((UChar *)haystack, 0, i); + p = (UChar *)haystack + i; + break; + } + } + if (ch1 == ch2) { + break; + } + } + } else { + p = strpbrk((char *)haystack, (char *)char_list); + } + + if (p) { + if (haystack_type == IS_UNICODE) { + RETURN_UNICODEL((UChar *)p, ((UChar *)haystack + haystack_len - (UChar *)p), 1); + } else if (haystack_type == IS_BINARY) { + RETURN_BINARYL((char *)p, ((char *)haystack + haystack_len - (char *)p), 1); + } else { + RETURN_STRINGL((char *)p, ((char *)haystack + haystack_len - (char *)p), 1); + } } else { RETURN_FALSE; } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Sat Oct 22 09:32:52 2005 EDT Modified files: /php-src/ext/standard string.c Log: - php_u_stristr: s/codepts/codepoints/ - make Jani happy http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.501r2=1.502ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.501 php-src/ext/standard/string.c:1.502 --- php-src/ext/standard/string.c:1.501 Sat Oct 22 09:25:00 2005 +++ php-src/ext/standard/string.c Sat Oct 22 09:32:51 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.501 2005/10/22 13:25:00 rolland Exp $ */ +/* $Id: string.c,v 1.502 2005/10/22 13:32:51 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -1917,7 +1917,7 @@ /* {{{ php_u_stristr Unicode version of case insensitve strstr */ -PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len) +\PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len) { int32_t i,j, last; UChar32 ch1, ch2; @@ -1926,12 +1926,12 @@ by changing codepoints, and an offset within the lower-case upper-case strings might be different codepoints. - Find an occurrence of the first codept of 't' in 's', and - starting from this point, match the rest of the codepts of 't' - with those in 's'. Comparisons are performed against lower-case - equivalents of the codepoints being matched. + Find an occurrence of the first codepoint of 't' in 's', and + starting from this point, match the rest of the codepoints of + 't' with those in 's'. Comparisons are performed against + lower-case equivalents of the codepoints being matched. - 'i' 'j' are indices used for extracting codepts 'ch1' + 'i' 'j' are indices used for extracting codepoints 'ch1' 'ch2'. 'last' is offset in 's' where the search for 't' started, and indicates beginning of 't' in 's' for a successful match. -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Sat Oct 22 09:36:57 2005 EDT Modified files: /php-src/ext/standard string.c Log: - php_u_stristr: Remove leading back-slash ... sheesh http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.502r2=1.503ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.502 php-src/ext/standard/string.c:1.503 --- php-src/ext/standard/string.c:1.502 Sat Oct 22 09:32:51 2005 +++ php-src/ext/standard/string.c Sat Oct 22 09:36:55 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.502 2005/10/22 13:32:51 rolland Exp $ */ +/* $Id: string.c,v 1.503 2005/10/22 13:36:55 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -1917,7 +1917,7 @@ /* {{{ php_u_stristr Unicode version of case insensitve strstr */ -\PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len) +PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len) { int32_t i,j, last; UChar32 ch1, ch2; -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Sat Oct 22 01:52:55 2005 EDT Modified files: /php-src/ext/standard string.c Log: - php_u_stristr: Code comments http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.499r2=1.500ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.499 php-src/ext/standard/string.c:1.500 --- php-src/ext/standard/string.c:1.499 Thu Oct 20 15:25:52 2005 +++ php-src/ext/standard/string.c Sat Oct 22 01:52:53 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.499 2005/10/20 19:25:52 rolland Exp $ */ +/* $Id: string.c,v 1.500 2005/10/22 05:52:53 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -1924,20 +1924,33 @@ /* Have to do this by hand since lower-casing can change lengths by changing codepoints, and an offset within the lower-case - upper-case strings might be different codepoints + upper-case strings might be different codepoints. + + Find an occurrence of the first codept of 't' in 's', and + starting from this point, match the rest of the codepts of 't' + with those in 's'. Comparisons are performed against lower-case + equivalents of the codepoints being matched. + + 'i' 'j' are indices used for extracting codepts 'ch1' + 'ch2'. 'last' is offset in 's' where the search for 't' + started, and indicates beginning of 't' in 's' for a successful + match. */ + i = 0; while (i = (s_len-t_len)) { last = i; U16_NEXT(s, i, s_len, ch1); - U16_GET(t, 0, 0, t_len, ch2); + j = 0; + U16_NEXT(t, j, t_len, ch2); if (u_tolower(ch1) == u_tolower(ch2)) { - j = 0; - U16_FWD_1(t, j, t_len); while (j t_len) { U16_NEXT(s, i, s_len, ch1); U16_NEXT(t, j, t_len, ch2); if (u_tolower(ch1) != u_tolower(ch2)) { + /* U16_NEXT() incr 'i' beyond 'ch1', re-adjust to + restart compare + */ U16_BACK_1(s, 0, i); break; } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Oct 19 15:10:21 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of str[c]spn() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.497r2=1.498ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.497 php-src/ext/standard/string.c:1.498 --- php-src/ext/standard/string.c:1.497 Mon Oct 17 15:50:13 2005 +++ php-src/ext/standard/string.c Wed Oct 19 15:10:13 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.497 2005/10/17 19:50:13 rolland Exp $ */ +/* $Id: string.c,v 1.498 2005/10/19 19:10:13 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -209,14 +209,16 @@ static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) { - char *s11, *s22; - int len1, len2; - long start, len; + void *s1, *s2; + int32_t len1, len2; + zend_uchar type1, type2; + long start, len; /* For UNICODE, these are codepoint units */ start = 0; len = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ss|ll, s11, len1, - s22, len2, start, len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, TT|ll, + s1, len1, type1, s2, len2, type2, + start, len) == FAILURE) { return; } @@ -246,18 +248,40 @@ len = len1 - start; } - if (behavior == STR_STRSPN) { - RETURN_LONG(php_strspn(s11 + start /*str1_start*/, - s22 /*str2_start*/, - s11 + start + len /*str1_end*/, - s22 + len2 /*str2_end*/)); - } else if (behavior == STR_STRCSPN) { - RETURN_LONG(php_strcspn(s11 + start /*str1_start*/, - s22 /*str2_start*/, - s11 + start + len /*str1_end*/, - s22 + len2 /*str2_end*/)); + if (type1 == IS_UNICODE) { + UChar *u_start, *u_end; + int32_t i = 0; + + U16_FWD_N((UChar*)s1, i, len1, start); + u_start = (UChar *)s1 + i; + U16_FWD_N((UChar *)s1, i, len1, len); + u_end = (UChar *)s1 + i; + + if (behavior == STR_STRSPN) { + RETURN_LONG(php_u_strspn(u_start /*str1_start*/, +(UChar *)s2 /*str2_start*/, +u_end /*str1_end*/, +(UChar *)s2 + len2 /*str2_end*/)); + } else if (behavior == STR_STRCSPN) { + RETURN_LONG(php_u_strcspn(u_start /*str1_start*/, + (UChar *)s2 /*str2_start*/, + u_end /*str1_end*/, + (UChar *)s2 + len2 /*str2_end*/)); + } + } else { + if (behavior == STR_STRSPN) { + RETURN_LONG(php_strspn((char *)s1 + start /*str1_start*/, + (char *)s2 /*str2_start*/, + (char *)s1 + start + len /*str1_end*/, + (char *)s2 + len2 /*str2_end*/)); + } else if (behavior == STR_STRCSPN) { + RETURN_LONG(php_strcspn((char *)s1 + start /*str1_start*/, + (char *)s2 /*str2_start*/, + (char *)s1 + start + len /*str1_end*/, + (char *)s2 + len2 /*str2_end*/)); + } } - + } /* {{{ proto int strspn(string str, string mask [, start [, len]]) @@ -1901,6 +1925,25 @@ } /* }}} */ +/* {{{ php_u_strspn + */ +PHPAPI int32_t php_u_strspn(UChar *s1, UChar *s2, UChar *s1_end, UChar *s2_end) +{ + int32_t len1 = s1_end - s1; + int32_t len2 = s2_end - s2; + int32_t i, codepts; + UChar32 ch; + + for (i = 0, codepts = 0 ; i len1 ; codepts++) { + U16_NEXT(s1, i, len1, ch); + if (u_memchr32(s2, ch, len2) == NULL) { + break; +
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Oct 17 13:07:46 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of strrpos() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.495r2=1.496ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.495 php-src/ext/standard/string.c:1.496 --- php-src/ext/standard/string.c:1.495 Sat Oct 15 08:50:20 2005 +++ php-src/ext/standard/string.c Mon Oct 17 13:07:44 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.495 2005/10/15 12:50:20 derick Exp $ */ +/* $Id: string.c,v 1.496 2005/10/17 17:07:44 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2292,61 +2292,118 @@ Finds position of last occurrence of a string within another string */ PHP_FUNCTION(strrpos) { - zval *zneedle; - char *needle, *haystack; - int needle_len, haystack_len; + zval *zhaystack, *zneedle; + void *haystack, *needle; + int32_t haystack_len, needle_len = 0; + zend_uchar str_type; long offset = 0; char *p, *e, ord_needle[2]; + UChar *pos, *u_p, *u_e, u_ord_needle[3]; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, sz|l, haystack, haystack_len, zneedle, offset) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, zz|l, + zhaystack, zneedle, offset) == FAILURE) { RETURN_FALSE; } - if (Z_TYPE_P(zneedle) == IS_STRING) { - needle = Z_STRVAL_P(zneedle); - needle_len = Z_STRLEN_P(zneedle); + if (Z_TYPE_P(zhaystack) != IS_UNICODE Z_TYPE_P(zhaystack) != IS_BINARY Z_TYPE_P(zhaystack) != IS_STRING) { + convert_to_text(zhaystack); + } + if (Z_TYPE_P(zneedle) == IS_UNICODE || Z_TYPE_P(zneedle) == IS_BINARY || Z_TYPE_P(zneedle) == IS_STRING) { + if (Z_TYPE_P(zneedle) != Z_TYPE_P(zhaystack)) { + str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_P(zhaystack), Z_TYPE_P(zneedle)); + if (str_type == (zend_uchar)-1) { + convert_to_explicit_type(zhaystack, IS_BINARY); + convert_to_explicit_type(zneedle, IS_BINARY); + } else { + convert_to_explicit_type(zhaystack, str_type); + convert_to_explicit_type(zneedle, str_type); + } + } + needle = Z_UNIVAL_P(zneedle); + needle_len = Z_UNILEN_P(zneedle); } else { - convert_to_long(zneedle); - ord_needle[0] = (char)(Z_LVAL_P(zneedle) 0xFF); - ord_needle[1] = '\0'; - needle = ord_needle; - needle_len = 1; + if (Z_TYPE_P(zhaystack) == IS_UNICODE) { + if (Z_LVAL_P(zneedle) 0 || Z_LVAL_P(zneedle) 0x10) { + php_error(E_WARNING, Needle argument codepoint value out of range (0 - 0x10)); + RETURN_FALSE; + } + if (U_IS_BMP(Z_LVAL_P(zneedle))) { + u_ord_needle[needle_len++] = (UChar)Z_LVAL_P(zneedle); + u_ord_needle[needle_len] = 0; + } else { + u_ord_needle[needle_len++] = (UChar)U16_LEAD(Z_LVAL_P(zneedle)); + u_ord_needle[needle_len++] = (UChar)U16_TRAIL(Z_LVAL_P(zneedle)); + u_ord_needle[needle_len] = 0; + } + needle = u_ord_needle; + } else { + convert_to_long(zneedle); + ord_needle[0] = (char)(Z_LVAL_P(zneedle) 0xFF); + ord_needle[1] = '\0'; + needle = ord_needle; + needle_len = 1; + } } + haystack = Z_UNIVAL_P(zhaystack); + haystack_len = Z_UNILEN_P(zhaystack); if ((haystack_len == 0) || (needle_len == 0)) { RETURN_FALSE; } - if (offset = 0) { - p = haystack + offset; - e = haystack + haystack_len - needle_len; + if (Z_TYPE_P(zhaystack) == IS_UNICODE) { + if (offset = 0) { + u_p = (UChar *)haystack + offset; + u_e = (UChar *)haystack + haystack_len - needle_len; + } else { + u_p = haystack; + if (-offset haystack_len) { + u_e = (UChar *)haystack - needle_len; + } else if (needle_len -offset) { +
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Oct 17 15:50:19 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of stripos() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.496r2=1.497ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.496 php-src/ext/standard/string.c:1.497 --- php-src/ext/standard/string.c:1.496 Mon Oct 17 13:07:44 2005 +++ php-src/ext/standard/string.c Mon Oct 17 15:50:13 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.496 2005/10/17 17:07:44 rolland Exp $ */ +/* $Id: string.c,v 1.497 2005/10/17 19:50:13 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2228,51 +2228,109 @@ Finds position of first occurrence of a string within another, case insensitive */ PHP_FUNCTION(stripos) { - char *found = NULL; - char *haystack; - int haystack_len; + zval *haystack, *needle; long offset = 0; - char *needle_dup = NULL, *haystack_dup; + int32_t haystack_len, needle_len = 0; + zend_uchar str_type; + void *haystack_dup, *needle_dup = NULL; char needle_char[2]; - zval *needle; + char c; + UChar u_needle_char[3]; + UChar32 ch; + void *found = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, sz|l, haystack, haystack_len, needle, offset) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, zz|l, haystack, needle, offset) == FAILURE) { return; } - if (offset 0 || offset haystack_len) { + if (Z_TYPE_P(haystack) != IS_UNICODE Z_TYPE_P(haystack) != IS_BINARY Z_TYPE_P(haystack) != IS_STRING) { + convert_to_text(haystack); + } + if (offset 0 || offset Z_UNILEN_P(haystack)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset not contained in string.); RETURN_FALSE; } - haystack_dup = estrndup(haystack, haystack_len); - php_strtolower(haystack_dup, haystack_len); - - if (Z_TYPE_P(needle) == IS_STRING) { - needle_dup = estrndup(Z_STRVAL_P(needle), Z_STRLEN_P(needle)); - php_strtolower(needle_dup, Z_STRLEN_P(needle)); - found = php_memnstr(haystack_dup + offset, needle_dup, Z_STRLEN_P(needle), haystack_dup + haystack_len); + if (Z_TYPE_P(needle) == IS_UNICODE || Z_TYPE_P(needle) == IS_BINARY || Z_TYPE_P(needle) == IS_STRING) { + if (!Z_UNILEN_P(needle)) { + RETURN_FALSE; + } + if (Z_TYPE_P(haystack) != Z_TYPE_P(needle)) { + str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_P(haystack), Z_TYPE_P(needle)); + if (str_type == (zend_uchar)-1) { + convert_to_explicit_type(haystack, IS_BINARY); + convert_to_explicit_type(needle, IS_BINARY); + } else { + convert_to_explicit_type(haystack, str_type); + convert_to_explicit_type(needle, str_type); + } + } + haystack_len = Z_UNILEN_P(haystack); + needle_len = Z_UNILEN_P(needle); + if (Z_TYPE_P(haystack) == IS_UNICODE) { + haystack_dup = eustrndup(Z_USTRVAL_P(haystack), haystack_len); + php_u_strtolower((UChar **)haystack_dup, haystack_len, UG(default_locale)); + needle_dup = eustrndup(Z_STRVAL_P(needle), needle_len); + php_u_strtolower((UChar **)needle_dup, needle_len, UG(default_locale)); + found = zend_u_memnstr((UChar *)haystack_dup + offset, + (UChar *)needle_dup, needle_len, + (UChar *)haystack_dup + haystack_len); + } else { + haystack_dup = estrndup(Z_STRVAL_P(haystack), haystack_len); + php_strtolower((char *)haystack_dup, haystack_len); + needle_dup = estrndup(Z_STRVAL_P(needle), needle_len); + php_strtolower((char *)needle_dup, Z_STRLEN_P(needle)); + found = php_memnstr((char *)haystack_dup + offset, + (char *)needle_dup, needle_len, + (char *)haystack_dup + haystack_len); + } } else { switch (Z_TYPE_P(needle)) { case IS_LONG: case IS_BOOL: - needle_char[0] = tolower((char) Z_LVAL_P(needle)); +
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Oct 5 08:16:03 2005 EDT Modified files: /php-src/ext/standard string.c Log: - strip_tags(): some fixes, still AWiP http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.492r2=1.493ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.492 php-src/ext/standard/string.c:1.493 --- php-src/ext/standard/string.c:1.492 Tue Oct 4 02:33:10 2005 +++ php-src/ext/standard/string.c Wed Oct 5 08:16:02 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.492 2005/10/04 06:33:10 dmitry Exp $ */ +/* $Id: string.c,v 1.493 2005/10/05 12:16:02 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -4803,8 +4803,8 @@ Strips HTML and PHP tags from a string */ PHP_FUNCTION(strip_tags) { - void *str, *allow=NULL; - int32_t str_len, allow_len; + void *str, *allow = NULL; + int32_t str_len, allow_len = 0; zend_uchar str_type, allow_type; void *buf; int32_t retval_len; @@ -4982,7 +4982,8 @@ int php_u_tag_find(UChar *tag, int32_t len, UChar *set, int32_t set_len) { int32_t idx = 0; - UChar32 ch, *norm, *n; + UChar32 ch; + UChar *norm, *n; int state = 0, done = 0; if (!len) { @@ -5018,9 +5019,9 @@ } } *(n++) = ''; - *n = '\0'; + *n = 0; - if (u_strFindFirst(tag, len, set, set_len) != NULL) { + if (u_strFindFirst(set, set_len, norm, n-norm) != NULL) { done = 1; } else { done = 0; @@ -5121,7 +5122,7 @@ buf = eustrndup(rbuf, len); rp = rbuf; - if (allow) { + if (allow_len != 0) { php_u_strtolower(allow, allow_len, UG(default_locale)); tbuf = eumalloc(PHP_TAG_BUF_SIZE+1); tp = tbuf; @@ -5147,7 +5148,7 @@ if (state == 0) { last = 0x3C; state = 1; - if (allow) { + if (allow_len) { tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp); *(tp++) = ch; } @@ -5162,7 +5163,7 @@ last = 0x28; br++; } - } else if (allow state == 1) { + } else if (allow_len state == 1) { tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp); *(tp++) = ch; } else if (state == 0) { @@ -5176,7 +5177,7 @@ last = ch; br--; } - } else if (allow state == 1) { + } else if (allow_len state == 1) { tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp); *(tp++) = ch; } else if (state == 0) { @@ -5194,7 +5195,7 @@ case 1: /* HTML/XML */ last = ch; state = 0; - if (allow) { + if (allow_len) { tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp); *(tp++) = ch; *(tp) = 0; @@ -5241,7 +5242,7 @@ } } else if (state == 0) { *(rp++) = ch; - } else if (allow state == 1) { + } else if (allow_len state == 1) { tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp); *(tp++) = ch; } @@ -5254,10 +5255,10 @@ last = ch; } else { if (state == 0) { - (*rp++) = 0x21; - } else if (allow state == 1) { - tp = ((tp-tbuf) = PHP_TAG_BUF_SIZE ? tbuf: tp); - *(tp++) = 0x21; + *(rp++) = ch; + } else if (allow_len state == 1) { + tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp); + *(tp++) = ch; } } break; @@ -5310,7 +5311,7 @@ reg_u_char:
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Oct 3 02:14:12 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of similar_text() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.489r2=1.490ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.489 php-src/ext/standard/string.c:1.490 --- php-src/ext/standard/string.c:1.489 Fri Sep 30 09:19:15 2005 +++ php-src/ext/standard/string.c Mon Oct 3 02:14:10 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.489 2005/09/30 13:19:15 rolland Exp $ */ +/* $Id: string.c,v 1.490 2005/10/03 06:14:10 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -3410,6 +3410,42 @@ } /* }}} */ +/* {{{ php_u_similar_str + */ +static void php_u_similar_str(const UChar *txt1, int32_t len1, + const UChar *txt2, int32_t len2, + int32_t *pos1, int32_t *end1, + int32_t *pos2, int32_t *end2, int *max) +{ + int32_t i1, i2, j1, j2, l; + UChar32 ch1, ch2; + + *max = 0; + for (i1 = 0 ; i1 len1 ; ) { + for (i2 = 0 ; i2 len2 ; ) { + l = 0 ; j1 = 0 ; j2 = 0; + while ((i1+j1 len1) (i2+j2 len2)) { + U16_NEXT(txt1+i1, j1, len1-i1, ch1); + U16_NEXT(txt2+i2, j2, len2-i2, ch2); + if (ch1 != ch2) { + U16_BACK_1(txt1+i1, 0, j1); + U16_BACK_1(txt2+i2, 0, j2); + break; + } + l++; + } + if (l *max) { + *max = l; + *pos1 = i1; *end1 = j1; + *pos2 = i2; *end2 = j2; + } + U16_FWD_1(txt2, i2, len2); + } + U16_FWD_1(txt1, i1, len1); + } +} +/* }}} */ + /* {{{ php_similar_str */ static void php_similar_str(const char *txt1, int len1, const char *txt2, int len2, int *pos1, int *pos2, int *max) @@ -3433,6 +3469,27 @@ } /* }}} */ +/* {{{ php_u_similar_char + */ +static int php_u_similar_char(const UChar *txt1, int32_t len1, const UChar *txt2, int32_t len2) +{ + int sum, max; + int32_t pos1, pos2, end1, end2; + + php_u_similar_str(txt1, len1, txt2, len2, pos1, end1, pos2, end2, max); + if ((sum = max)) { + if (pos1 pos2) { + sum += php_u_similar_char(txt1, pos1, txt2, pos2); + } + if ((pos1 + end1 len1) (pos2 + end2 len2)) { + sum += php_similar_char((UChar *)txt1+pos1+end1, len1-pos1-end1, + (UChar *)txt2+pos2+end2, len2-pos2-end2); + } + } + return sum; +} +/* }}} */ + /* {{{ php_similar_char */ static int php_similar_char(const char *txt1, int len1, const char *txt2, int len2) @@ -3463,30 +3520,49 @@ zval **t1, **t2, **percent; int ac = ZEND_NUM_ARGS(); int sim; - + zend_uchar str_type; + if (ac 2 || ac 3 || zend_get_parameters_ex(ac, t1, t2, percent) == FAILURE) { WRONG_PARAM_COUNT; } - - convert_to_string_ex(t1); - convert_to_string_ex(t2); - + if (Z_TYPE_PP(t1) != IS_UNICODE Z_TYPE_PP(t1) != IS_BINARY Z_TYPE_PP(t1) != IS_STRING) { + convert_to_text_ex(t1); + } + if (Z_TYPE_PP(t2) != IS_UNICODE Z_TYPE_PP(t2) != IS_BINARY Z_TYPE_PP(t2) != IS_STRING) { + convert_to_text_ex(t2); + } + str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(t1), Z_TYPE_PP(t2)); + if (str_type == (zend_uchar)-1) { + convert_to_binary_ex(t1); + convert_to_binary_ex(t2); + } else { + convert_to_explicit_type_ex(t1, str_type); + convert_to_explicit_type_ex(t2, str_type); + } if (ac 2) { convert_to_double_ex(percent); } - - if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) { + + if (Z_UNILEN_PP(t1) + Z_UNILEN_PP(t2) == 0) { if (ac 2) { Z_DVAL_PP(percent) = 0; } RETURN_LONG(0); } - - sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), Z_STRVAL_PP(t2), Z_STRLEN_PP(t2)); + + if (str_type == IS_UNICODE) { + sim = php_u_similar_char(Z_USTRVAL_PP(t1), Z_USTRLEN_PP(t1), Z_USTRVAL_PP(t2),
[PHP-CVS] cvs: php-src / unicode-progress.txt
rolland Mon Oct 3 02:19:51 2005 EDT Modified files: /php-srcunicode-progress.txt Log: levenshtein(), similar_text() http://cvs.php.net/diff.php/php-src/unicode-progress.txt?r1=1.9r2=1.10ty=u Index: php-src/unicode-progress.txt diff -u php-src/unicode-progress.txt:1.9 php-src/unicode-progress.txt:1.10 --- php-src/unicode-progress.txt:1.9Wed Sep 28 08:25:34 2005 +++ php-src/unicode-progress.txtMon Oct 3 02:19:51 2005 @@ -11,8 +11,10 @@ chr() explode() implode() +levenshtein() ord() range() +similar_text() str_pad() str_repeat() stripslashes() -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Oct 3 13:00:05 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of strip_tags() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.490r2=1.491ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.490 php-src/ext/standard/string.c:1.491 --- php-src/ext/standard/string.c:1.490 Mon Oct 3 02:14:10 2005 +++ php-src/ext/standard/string.c Mon Oct 3 13:00:00 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.490 2005/10/03 06:14:10 rolland Exp $ */ +/* $Id: string.c,v 1.491 2005/10/03 17:00:00 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -4803,34 +4803,30 @@ Strips HTML and PHP tags from a string */ PHP_FUNCTION(strip_tags) { - char *buf; - zval **str, **allow=NULL; - char *allowed_tags=NULL; - int allowed_tags_len=0; - size_t retval_len; + void *str, *allow=NULL; + int32_t str_len, allow_len; + zend_uchar str_type, allow_type; + void *buf; + int32_t retval_len; - switch (ZEND_NUM_ARGS()) { - case 1: - if (zend_get_parameters_ex(1, str) == FAILURE) { - RETURN_FALSE; - } - break; - case 2: - if (zend_get_parameters_ex(2, str, allow) == FAILURE) { - RETURN_FALSE; - } - convert_to_string_ex(allow); - allowed_tags = Z_STRVAL_PP(allow); - allowed_tags_len = Z_STRLEN_PP(allow); - break; - default: - WRONG_PARAM_COUNT; - break; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, T|T, str, str_len, str_type, + allow, allow_len, allow_type) == FAILURE) { + return; + } + + if (str_type == IS_UNICODE) { + buf = eustrndup(str, str_len); + retval_len = php_u_strip_tags((UChar *)buf, str_len, NULL, (UChar *)allow, allow_len TSRMLS_CC); + RETURN_UNICODEL((UChar *)buf, retval_len, 0); + } else { + buf = estrndup(str, str_len); + retval_len = php_strip_tags((char *)buf, str_len, NULL, (char *)allow, allow_len); + if (str_type == IS_BINARY) { + RETURN_BINARYL((char *)buf, retval_len, 0); + } else { + RETURN_STRINGL((char *)buf, retval_len, 0); + } } - convert_to_string_ex(str); - buf = estrndup(Z_STRVAL_PP(str), Z_STRLEN_PP(str)); - retval_len = php_strip_tags(buf, Z_STRLEN_PP(str), NULL, allowed_tags, allowed_tags_len); - RETURN_STRINGL(buf, retval_len, 0); } /* }}} */ @@ -4971,15 +4967,72 @@ #define PHP_TAG_BUF_SIZE 1023 -/* {{{ php_tag_find +/* php_u_tag_find / php_tag_find * * Check if tag is in a set of tags * * states: - * + * * 0 start tag * 1 first non-whitespace char seen */ + +/* {{{ php_u_tag_find + */ +int php_u_tag_find(UChar *tag, int32_t len, UChar *set, int32_t set_len) +{ + int32_t idx = 0; + UChar32 ch, *norm, *n; + int state = 0, done = 0; + + if (!len) { + return 0; + } + + norm = eumalloc(len+1); + n = norm; + + while (!done) { + U16_NEXT(tag, idx, len, ch); + switch (u_tolower(ch)) { + case '': + *(n++) = ch; + break; + case '': + done = 1; + break; + default: + if (u_isWhitespace(ch) == FALSE) { + if (state == 0) { + state = 1; + if (ch != '/') + *(n++) = ch; + } else { + *(n++) = ch; + } + } else { + if (state == 1) + done = 1; + } + break; + } + } + *(n++) = ''; + *n = '\0'; + + if (u_strFindFirst(tag, len, set, set_len) != NULL) { + done = 1; + } else { + done = 0; + } + + efree(norm); + return done; +} +/* }}} */ + +/* {{{ php_tag_find + */ int php_tag_find(char *tag, int len, char *set) { char c, *n, *t; int state=0, done=0; @@ -5033,7 +5086,7 @@ } /* }}} */ -/* {{{ php_strip_tags +/* php_u_strip_tags /
[PHP-CVS] cvs: php-src /ext/standard levenshtein.c
rolland Fri Sep 30 02:20:49 2005 EDT Modified files: /php-src/ext/standard levenshtein.c Log: - Unicode impl of levenshtein() http://cvs.php.net/diff.php/php-src/ext/standard/levenshtein.c?r1=1.34r2=1.35ty=u Index: php-src/ext/standard/levenshtein.c diff -u php-src/ext/standard/levenshtein.c:1.34 php-src/ext/standard/levenshtein.c:1.35 --- php-src/ext/standard/levenshtein.c:1.34 Wed Aug 3 10:08:08 2005 +++ php-src/ext/standard/levenshtein.c Fri Sep 30 02:20:47 2005 @@ -15,7 +15,7 @@ | Author: Hartmut Holzgraefe [EMAIL PROTECTED]| +--+ */ -/* $Id: levenshtein.c,v 1.34 2005/08/03 14:08:08 sniper Exp $ */ +/* $Id: levenshtein.c,v 1.35 2005/09/30 06:20:47 rolland Exp $ */ #include php.h #include stdlib.h @@ -27,39 +27,58 @@ /* {{{ reference_levdist * reference implementation, only optimized for memory usage, not speed */ -static int reference_levdist(const char *s1, int l1, - const char *s2, int l2, - int cost_ins, int cost_rep, int cost_del ) +static int reference_levdist(void *s1, int32_t l1, void *s2, int32_t l2, zend_uchar str_type, int cost_ins, int cost_rep, int cost_del ) { int *p1, *p2, *tmp; - int i1, i2, c0, c1, c2; - - if(l1==0) return l2*cost_ins; - if(l2==0) return l1*cost_del; + int32_t i1, i2, j1, j2, cp1, cp2; + int32_t c0, c1, c2; + UChar32 ch1, ch2; + + if (str_type == IS_UNICODE) { + cp1 = u_countChar32((UChar *)s1, l1); + cp2 = u_countChar32((UChar *)s2, l2); + + if (cp1 == 0) return cp2*cost_ins; + if (cp2 == 0) return cp1*cost_del; + if ((cp1LEVENSHTEIN_MAX_LENTH)||(cp2LEVENSHTEIN_MAX_LENTH)) { + return -1; + } - if((l1LEVENSHTEIN_MAX_LENTH)||(l2LEVENSHTEIN_MAX_LENTH)) - return -1; + p1 = safe_emalloc((cp2+1), sizeof(int), 0); + p2 = safe_emalloc((cp2+1), sizeof(int), 0); + } else { + if (l1 == 0) return l2*cost_ins; + if (l2 == 0) return l1*cost_del; + if ((l1LEVENSHTEIN_MAX_LENTH)||(l2LEVENSHTEIN_MAX_LENTH)) { + return -1; + } - p1 = safe_emalloc((l2+1), sizeof(int), 0); - p2 = safe_emalloc((l2+1), sizeof(int), 0); + p1 = safe_emalloc((l2+1), sizeof(int), 0); + p2 = safe_emalloc((l2+1), sizeof(int), 0); + } - for(i2=0;i2=l2;i2++) + for (i2 = 0 ; i2 = l2 ; i2++) p1[i2] = i2*cost_ins; - for(i1=0;i1l1;i1++) - { - p2[0]=p1[0]+cost_del; - for(i2=0;i2l2;i2++) - { - c0=p1[i2]+((s1[i1]==s2[i2])?0:cost_rep); - c1=p1[i2+1]+cost_del; if(c1c0) c0=c1; - c2=p2[i2]+cost_ins; if(c2c0) c0=c2; - p2[i2+1]=c0; - } - tmp=p1; p1=p2; p2=tmp; + for (i1 = 0, j1 = 0 ; i1 l1 ; i1++) { + p2[0] = p1[0] + cost_del; + if (str_type == IS_UNICODE) { + U16_NEXT((UChar *)s1, j1, l1, ch1); } - - c0=p1[l2]; + for (i2 = 0, j2 = 0 ; i2 l2 ; i2++) { + if (str_type == IS_UNICODE) { + U16_NEXT((UChar *)s2, j2, l2, ch2); + c0 = p1[i2] + ((ch1==ch2) ? 0 : cost_rep); + } else { + c0 = p1[i2] + ((*((char *)s1+i1)==*((char *)s2+i2)) ? 0 : cost_rep); + } + c1 = p1[i2+1] + cost_del; if (c1 c0) c0 = c1; + c2 = p2[i2] + cost_ins; if (c2 c0) c0 = c2; + p2[i2+1] = c0; + } + tmp=p1; p1=p2; p2=tmp; + } + c0 = p1[l2]; efree(p1); efree(p2); @@ -70,7 +89,7 @@ /* {{{ custom_levdist */ -static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC) +static int custom_levdist(void *str1, void *str2, char *callback_name TSRMLS_DC) { php_error_docref(NULL TSRMLS_CC, E_WARNING, The general Levenshtein support is not there yet); /* not there yet */ @@ -83,56 +102,51 @@ Calculate Levenshtein distance between two strings */ PHP_FUNCTION(levenshtein) { - zval **str1, **str2, **cost_ins, **cost_rep, **cost_del,
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Fri Sep 30 09:19:19 2005 EDT Modified files: /php-src/ext/standard string.c Log: - substr_replace(): call correct funcn for string conversion http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.488r2=1.489ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.488 php-src/ext/standard/string.c:1.489 --- php-src/ext/standard/string.c:1.488 Thu Sep 29 07:05:30 2005 +++ php-src/ext/standard/string.c Fri Sep 30 09:19:15 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.488 2005/09/29 11:05:30 rolland Exp $ */ +/* $Id: string.c,v 1.489 2005/09/30 13:19:15 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2800,11 +2800,11 @@ if (tmp_repl Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl)) { str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(str), Z_TYPE_PP(tmp_repl)); if (str_type == (zend_uchar)-1) { - convert_to_explicit_type(str, IS_BINARY); - convert_to_explicit_type(tmp_repl, IS_BINARY); + convert_to_explicit_type_ex(str, IS_BINARY); + convert_to_explicit_type_ex(tmp_repl, IS_BINARY); } else { - convert_to_explicit_type(str, str_type); - convert_to_explicit_type(tmp_repl, str_type); + convert_to_explicit_type_ex(str, str_type); + convert_to_explicit_type_ex(tmp_repl, str_type); } } php_adjust_limits(str, f, l); @@ -2881,11 +2881,11 @@ if (tmp_repl Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl)) { str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(tmp_str), Z_TYPE_PP(tmp_repl)); if (str_type == (zend_uchar)-1) { - convert_to_explicit_type(tmp_str, IS_BINARY); - convert_to_explicit_type(tmp_repl, IS_BINARY); + convert_to_explicit_type_ex(tmp_str, IS_BINARY); + convert_to_explicit_type_ex(tmp_repl, IS_BINARY); } else { - convert_to_explicit_type(tmp_str, str_type); - convert_to_explicit_type(tmp_repl, str_type); + convert_to_explicit_type_ex(tmp_str, str_type); + convert_to_explicit_type_ex(tmp_repl, str_type); } } php_adjust_limits(tmp_str, f, l); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Thu Sep 29 05:33:41 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Updated addslashes(): add codepoints directly rather than with zend_codepoint_to_uchar() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.486r2=1.487ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.486 php-src/ext/standard/string.c:1.487 --- php-src/ext/standard/string.c:1.486 Wed Sep 28 18:31:29 2005 +++ php-src/ext/standard/string.c Thu Sep 29 05:33:38 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.486 2005/09/28 22:31:29 iliaa Exp $ */ +/* $Id: string.c,v 1.487 2005/09/29 09:33:38 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -3817,12 +3817,12 @@ U16_NEXT(str, i, length, ch); switch (ch) { case '\0': - buf_len += zend_codepoint_to_uchar('\\', buf+buf_len); - buf_len += zend_codepoint_to_uchar('0', buf+buf_len); + *(buf+buf_len) = (UChar)0x5C; buf_len++; /* \ */ + *(buf+buf_len) = (UChar)0x30; buf_len++; /* 0 */ break; case '\'': - buf_len += zend_codepoint_to_uchar('\'', buf+buf_len); - buf_len += zend_codepoint_to_uchar('\'', buf+buf_len); + *(buf+buf_len) = (UChar)0x27; buf_len++; /* ' */ + *(buf+buf_len) = (UChar)0x27; buf_len++; /* ' */ break; default: buf_len += zend_codepoint_to_uchar(ch, buf+buf_len); @@ -3834,13 +3834,13 @@ U16_NEXT(str, i, length, ch); switch (ch) { case '\0': - buf_len += zend_codepoint_to_uchar('\\', buf+buf_len); - buf_len += zend_codepoint_to_uchar('0', buf+buf_len); + *(buf+buf_len) = (UChar)0x5C; buf_len++; /* \ */ + *(buf+buf_len) = (UChar)0x30; buf_len++; /* 0 */ break; case '\'': case '\': case '\\': - buf_len += zend_codepoint_to_uchar('\\', buf+buf_len); + *(buf+buf_len) = (UChar)0x5C; buf_len++; /* \ */ /* break is missing *intentionally* */ default: buf_len += zend_codepoint_to_uchar(ch, buf+buf_len); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard php_string.h string.c
rolland Wed Sep 28 05:22:14 2005 EDT Modified files: /php-src/ext/standard php_string.h string.c Log: - Unicode impl of {add,strip}slashes() http://cvs.php.net/diff.php/php-src/ext/standard/php_string.h?r1=1.89r2=1.90ty=u Index: php-src/ext/standard/php_string.h diff -u php-src/ext/standard/php_string.h:1.89 php-src/ext/standard/php_string.h:1.90 --- php-src/ext/standard/php_string.h:1.89 Tue Aug 16 02:02:55 2005 +++ php-src/ext/standard/php_string.h Wed Sep 28 05:22:08 2005 @@ -17,7 +17,7 @@ +--+ */ -/* $Id: php_string.h,v 1.89 2005/08/16 06:02:55 rolland Exp $ */ +/* $Id: php_string.h,v 1.90 2005/09/28 09:22:08 rolland Exp $ */ /* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */ @@ -120,10 +120,13 @@ PHPAPI UChar *php_u_strtoupper(UChar **s, int32_t *len, const char *locale); PHPAPI UChar *php_u_strtolower(UChar **s, int32_t *len, const char *locale); PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trlen); +PHPAPI UChar *php_u_addslashes(UChar *str, int32_t length, int32_t *new_length, int freeit TSRMLS_DC); +PHPAPI UChar *php_u_addslashes_ex(UChar *str, int32_t length, int32_t *new_length, int freeit, int ignore_sybase TSRMLS_DC); PHPAPI char *php_addslashes(char *str, int length, int *new_length, int freeit TSRMLS_DC); PHPAPI char *php_addslashes_ex(char *str, int length, int *new_length, int freeit, int ignore_sybase TSRMLS_DC); PHPAPI char *php_addcslashes(char *str, int length, int *new_length, int freeit, char *what, int wlength TSRMLS_DC); PHPAPI void php_stripslashes(char *str, int *len TSRMLS_DC); +PHPAPI void php_u_stripslashes(UChar *str, int32_t *len TSRMLS_DC); PHPAPI void php_stripcslashes(char *str, int *len); PHPAPI void php_basename(char *s, size_t len, char *suffix, size_t sufflen, char **p_ret, size_t *p_len TSRMLS_DC); PHPAPI size_t php_dirname(char *str, size_t len); http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.483r2=1.484ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.483 php-src/ext/standard/string.c:1.484 --- php-src/ext/standard/string.c:1.483 Mon Sep 26 19:08:10 2005 +++ php-src/ext/standard/string.c Wed Sep 28 05:22:08 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.483 2005/09/26 23:08:10 iliaa Exp $ */ +/* $Id: string.c,v 1.484 2005/09/28 09:22:08 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -3427,6 +3427,66 @@ } /* }}} */ +/* {{{ php_u_stripslashes + * + * be careful, this edits the string in-place */ +PHPAPI void php_u_stripslashes(UChar *str, int32_t *len TSRMLS_DC) +{ + int32_t tmp_len = 0, i = 0; + UChar32 ch1, ch2; + + ch1 = -1; ch2 = -1; + if (PG(magic_quotes_sybase)) { + while (i *len) { + U16_NEXT(str, i, *len, ch1); + if (ch1 == '\'') { + tmp_len += zend_codepoint_to_uchar(ch1, str+tmp_len); + if (i *len) { + U16_NEXT(str, i, *len, ch2); + if (ch2 != '\'') { + tmp_len += zend_codepoint_to_uchar(ch2, str+tmp_len); + } + } + } else if (ch1 == '\\') { + if (i *len) { + U16_NEXT(str, i, *len, ch2); + if (ch2 == '0') { + tmp_len += zend_codepoint_to_uchar('\0', str+tmp_len); + } else { + tmp_len += zend_codepoint_to_uchar(ch1, str+tmp_len); + tmp_len += zend_codepoint_to_uchar(ch2, str+tmp_len); + } + } else { + tmp_len += zend_codepoint_to_uchar(ch1, str+tmp_len); + } + } else { + tmp_len += zend_codepoint_to_uchar(ch1, str+tmp_len); + } + } + } else { + while (i *len) { + U16_NEXT(str, i, *len, ch1); + if (ch1 == '\\') { + if (i *len) { + U16_NEXT(str, i, *len, ch2); + if (ch2 == '0') { + tmp_len += zend_codepoint_to_uchar('\0', str+tmp_len); + } else { +
[PHP-CVS] cvs: php-src / unicode-progress.txt
rolland Wed Sep 28 08:25:35 2005 EDT Modified files: /php-srcunicode-progress.txt Log: {add,strip}slashes() http://cvs.php.net/diff.php/php-src/unicode-progress.txt?r1=1.8r2=1.9ty=u Index: php-src/unicode-progress.txt diff -u php-src/unicode-progress.txt:1.8 php-src/unicode-progress.txt:1.9 --- php-src/unicode-progress.txt:1.8Fri Sep 9 05:54:16 2005 +++ php-src/unicode-progress.txtWed Sep 28 08:25:34 2005 @@ -6,6 +6,7 @@ Status: In Progress Completed: +addslashes() bin2hex() chr() explode() @@ -14,6 +15,7 @@ range() str_pad() str_repeat() +stripslashes() strpos() strrev() strstr() -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Sep 28 10:58:37 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Updated substr_replace() to use: - zend_get_unified_string_type()/convert_to_explicit_type() for type conversion - u_countChar32() for counting codepoints http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.484r2=1.485ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.484 php-src/ext/standard/string.c:1.485 --- php-src/ext/standard/string.c:1.484 Wed Sep 28 05:22:08 2005 +++ php-src/ext/standard/string.c Wed Sep 28 10:58:31 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.484 2005/09/28 09:22:08 rolland Exp $ */ +/* $Id: string.c,v 1.485 2005/09/28 14:58:31 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2553,44 +2553,14 @@ /* }}} */ -/* {{{ php_unify_string_types - */ -PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC) -{ - if (p == NULL || q == NULL) { - return; - } - - if (Z_TYPE_PP(p) == IS_UNICODE) { - if (Z_TYPE_PP(q) == IS_BINARY) { - convert_to_binary_ex(p); - } else { - convert_to_unicode_ex(q); - } - } else if (Z_TYPE_PP(p) == IS_BINARY) { - convert_to_binary_ex(q); - } else { - if (Z_TYPE_PP(q) == IS_BINARY) { - convert_to_binary_ex(p); - } else { - convert_to_string_ex(q); - } - } -} -/* {{{ */ - /* {{{ php_adjust_limits */ PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l) { - int32_t i, str_codepts; + int32_t str_codepts; if (Z_TYPE_PP(str) == IS_UNICODE) { - i = 0; str_codepts = 0; - while (i Z_USTRLEN_PP(str)) { - U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str)); - str_codepts++; - } + str_codepts = u_countChar32(Z_USTRVAL_PP(str), Z_USTRLEN_PP(str)); } else { str_codepts = Z_STRLEN_PP(str); } @@ -2688,7 +2658,7 @@ HashPosition pos_str, pos_from, pos_repl, pos_len; zval **tmp_str = NULL, **tmp_from = NULL, **tmp_repl = NULL, **tmp_len= NULL; - + zend_uchar str_type; if (argc 3 || argc 4 || zend_get_parameters_ex(argc, str, repl, from, len) == FAILURE) { WRONG_PARAM_COUNT; @@ -2747,8 +2717,16 @@ tmp_repl = repl; } - if (tmp_repl Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl)) - php_unify_string_types(str, tmp_repl TSRMLS_CC); + if (tmp_repl Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl)) { + str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(str), Z_TYPE_PP(tmp_repl)); + if (str_type == (zend_uchar)-1) { + convert_to_explicit_type(str, IS_BINARY); + convert_to_explicit_type(tmp_repl, IS_BINARY); + } else { + convert_to_explicit_type(str, str_type); + convert_to_explicit_type(tmp_repl, str_type); + } + } php_adjust_limits(str, f, l); result_len = php_do_substr_replace(result, str, tmp_repl, f, l TSRMLS_CC); @@ -2820,8 +2798,16 @@ tmp_repl = repl; } - if (tmp_repl Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl)) - php_unify_string_types(tmp_str, tmp_repl TSRMLS_CC); + if (tmp_repl Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl)) { + str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(tmp_str), Z_TYPE_PP(tmp_repl)); + if (str_type == (zend_uchar)-1) { + convert_to_explicit_type(tmp_str, IS_BINARY); + convert_to_explicit_type(tmp_repl, IS_BINARY); + } else { + convert_to_explicit_type(tmp_str, str_type); + convert_to_explicit_type(tmp_repl, str_type); + } + } php_adjust_limits(tmp_str, f, l); result_len = php_do_substr_replace(result, tmp_str, tmp_repl, f, l TSRMLS_CC); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Fri Sep 9 15:07:19 2005 EDT Modified files: /php-src/ext/standard string.c Log: - str_pad(): Use u_countChar32() for codepoint counting, eumalloc/eurealloc() for Unicode mallocs. http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.481r2=1.482ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.481 php-src/ext/standard/string.c:1.482 --- php-src/ext/standard/string.c:1.481 Thu Sep 8 10:07:40 2005 +++ php-src/ext/standard/string.c Fri Sep 9 15:07:18 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.481 2005/09/08 14:07:40 derick Exp $ */ +/* $Id: string.c,v 1.482 2005/09/09 19:07:18 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -5394,10 +5394,7 @@ if (input_type == IS_UNICODE) { /* For Unicode, num_pad_chars/pad_length is number of codepoints */ i = 0; input_codepts = 0; - while (i input_len) { - U16_FWD_1((UChar *)input, i, input_len); - input_codepts++; - } + input_codepts = u_countChar32((UChar *)input, input_len); num_pad_chars = pad_length - input_codepts; } else { num_pad_chars = pad_length - input_len; @@ -5439,7 +5436,7 @@ } if (input_type == IS_UNICODE) { - result = emalloc(UBYTES(input_len + num_pad_chars*2 + 1)); + result = eumalloc(input_len + num_pad_chars*2 + 1); } else { result = emalloc(input_len + num_pad_chars + 1); } @@ -5481,7 +5478,7 @@ result_len += zend_codepoint_to_uchar(ch, (UChar *)result + result_len); } *((UChar *)result + result_len) = 0; - result = erealloc(result, UBYTES(result_len+1)); + result = eurealloc(result, result_len+1); } else { for (i = 0; i left_pad; i++) *((char *)result + result_len++) = *((char *)padstr + (i % padstr_len)); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Thu Sep 8 01:05:38 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of substr_replace() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.478r2=1.479ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.478 php-src/ext/standard/string.c:1.479 --- php-src/ext/standard/string.c:1.478 Wed Sep 7 03:00:03 2005 +++ php-src/ext/standard/string.c Thu Sep 8 01:05:36 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.478 2005/09/07 07:00:03 dmitry Exp $ */ +/* $Id: string.c,v 1.479 2005/09/08 05:05:36 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2553,6 +2553,118 @@ /* }}} */ +/* {{{ php_unify_string_types + */ +PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC) +{ + if (p == NULL || q == NULL) { + return; + } + + if (Z_TYPE_PP(p) == IS_UNICODE) { + if (Z_TYPE_PP(q) == IS_BINARY) { + convert_to_binary_ex(p); + } else { + convert_to_unicode_ex(q); + } + } else if (Z_TYPE_PP(p) == IS_BINARY) { + convert_to_binary_ex(q); + } else { + if (Z_TYPE_PP(q) == IS_BINARY) { + convert_to_binary_ex(p); + } else { + convert_to_string_ex(q); + } + } +} +/* {{{ */ + +/* {{{ php_adjust_limits + */ +PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l) +{ + int32_t i, str_codepts; + + if (Z_TYPE_PP(str) == IS_UNICODE) { + i = 0; str_codepts = 0; + while (i Z_USTRLEN_PP(str)) { + U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str)); + str_codepts++; + } + } else { + str_codepts = Z_STRLEN_PP(str); + } + + /* If from position is negative, count start position from the end +* of the string */ + if (*f 0) { + *f = str_codepts + *f; + if (*f 0) { + *f = 0; + } + } else if (*f str_codepts) { + *f = str_codepts; + } + /* If length position is negative, set it to the length +* needed to stop that many codepts/chars from the end of the string */ + if (*l 0) { + *l = str_codepts - *f + *l; + if (*l 0) { + *l = 0; + } + } + if (((unsigned)(*f) + (unsigned)(*l)) str_codepts) { + *l = str_codepts - *f; + } +} +/* }}} */ + +/* {{{ php_do_substr_replace + */ +PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl, int32_t f, int32_t l TSRMLS_DC) +{ + void *buf; + int32_t buf_len, idx; + UChar ch; + + if (Z_TYPE_PP(str) == IS_UNICODE) { + buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl) + 1)); + + /* buf_len is codept count here */ + buf_len = 0; idx = 0; + while (f-- 0) { + U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch); + buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf + buf_len); + } + if (repl != NULL) { + u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl), Z_USTRLEN_PP(repl)); + buf_len += Z_USTRLEN_PP(repl); + } + U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l); + u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx, Z_USTRLEN_PP(str) - idx); + buf_len += (Z_USTRLEN_PP(str) - idx); + + *((UChar *)buf + buf_len) = 0; + buf = erealloc(buf, UBYTES(buf_len + 1)); + } else { + /* buf_len is char count here */ + buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl); + buf = emalloc(buf_len + 1); + + memcpy(buf, Z_STRVAL_PP(str), f); + if (repl != NULL ) { + memcpy((char *)buf + f, Z_STRVAL_PP(repl), Z_STRLEN_PP(repl)); + } + memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l); + + *((char *)buf + buf_len) = '\0'; + } + + *result = buf; + return buf_len; +} +/* }}} */ + /* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed length]) Replaces part of a string with another string */ PHP_FUNCTION(substr_replace) @@ -2561,10 +2673,10 @@ zval **from; zval **len = NULL; zval **repl; - char *result; - int result_len; - int l = 0; - int f; + void *result; + int32_t result_len; +
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Sep 5 06:55:38 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of str_pad() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.475r2=1.476ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.475 php-src/ext/standard/string.c:1.476 --- php-src/ext/standard/string.c:1.475 Mon Aug 29 02:51:14 2005 +++ php-src/ext/standard/string.c Mon Sep 5 06:55:35 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.475 2005/08/29 06:51:14 dmitry Exp $ */ +/* $Id: string.c,v 1.476 2005/09/05 10:55:35 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -5301,93 +5301,142 @@ PHP_FUNCTION(str_pad) { /* Input arguments */ - zval **input, /* Input string */ -**pad_length, /* Length to pad to */ -**pad_string, /* Padding string */ -**pad_type;/* Padding type (left/right/both) */ + void *input; /* Input string */ + int32_t pad_length; /* Length to pad to, in codepoints for Unicode */ + void *padstr; /* Padding string */ + int32_t pad_type; /* Padding type (left/right/both) */ + int32_t input_len, padstr_len; /* Lengths in code units for Unicode */ + zend_uchar input_type, padstr_type; /* Helper variables */ - intnum_pad_chars; /* Number of padding characters (total - input size) */ - char *result = NULL; /* Resulting string */ - intresult_len = 0; /* Length of the resulting string */ - char *pad_str_val = ; /* Pointer to padding string */ - intpad_str_len = 1; /* Length of the padding string */ - intpad_type_val = STR_PAD_RIGHT; /* The padding type value */ - inti, left_pad=0, right_pad=0; + int32_t input_codepts; /* Number of codepts in Unicode input */ + int32_t num_pad_chars; /* Number of padding characters (total - input size) */ + void *result = NULL; /* Resulting string */ + int32_t result_len = 0; /* Length of the resulting string */ + int32_t i, j, left_pad=0, right_pad=0; + UChar32 ch; - if (ZEND_NUM_ARGS() 2 || ZEND_NUM_ARGS() 4 || - zend_get_parameters_ex(ZEND_NUM_ARGS(), input, pad_length, pad_string, pad_type) == FAILURE) { + if (ZEND_NUM_ARGS() 2 || ZEND_NUM_ARGS() 4) { WRONG_PARAM_COUNT; } + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, Tl|Tl, + input, input_len, input_type, pad_length, + padstr, padstr_len, padstr_type, pad_type) == FAILURE) { + return; + } - /* Perform initial conversion to expected data types. */ - convert_to_string_ex(input); - convert_to_long_ex(pad_length); - - num_pad_chars = Z_LVAL_PP(pad_length) - Z_STRLEN_PP(input); - + if (input_type == IS_UNICODE) { + /* For Unicode, num_pad_chars/pad_length is number of codepoints */ + i = 0; input_codepts = 0; + while (i input_len) { + U16_FWD_1((UChar *)input, i, input_len); + input_codepts++; + } + num_pad_chars = pad_length - input_codepts; + } else { + num_pad_chars = pad_length - input_len; + } /* If resulting string turns out to be shorter than input string, we simply copy the input and return. */ if (num_pad_chars 0) { - RETURN_ZVAL(*input, 1, 0); + if (input_type == IS_UNICODE) { + RETURN_UNICODEL((UChar *)input, input_len, 1); + } else if (input_type == IS_BINARY) { + RETURN_BINARYL((char *)input, input_len, 1); + } else { + RETURN_STRINGL((char *)input, input_len, 1); + } } - /* Setup the padding string values if specified. */ + /* Setup the padding string values if NOT specified. */ if (ZEND_NUM_ARGS() 2) { - convert_to_string_ex(pad_string); - if (Z_STRLEN_PP(pad_string) == 0) { + if (padstr_len == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, Padding string cannot be empty.); return; } - pad_str_val = Z_STRVAL_PP(pad_string); - pad_str_len = Z_STRLEN_PP(pad_string); - if (ZEND_NUM_ARGS() 3) { - convert_to_long_ex(pad_type); - pad_type_val =
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Mon Sep 5 12:37:46 2005 EDT Modified files: /php-src/ext/standard string.c Log: Pointer arithmetic with char * rather than void * http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.476r2=1.477ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.476 php-src/ext/standard/string.c:1.477 --- php-src/ext/standard/string.c:1.476 Mon Sep 5 06:55:35 2005 +++ php-src/ext/standard/string.c Mon Sep 5 12:37:45 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.476 2005/09/05 10:55:35 rolland Exp $ */ +/* $Id: string.c,v 1.477 2005/09/05 16:37:45 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -5273,20 +5273,20 @@ if (haystack_type == IS_UNICODE) { while ((p = zend_u_memnstr((UChar *)p, (UChar *)needle, needle_len, (UChar *)endp)) != NULL) { /*(UChar *)p += needle_len; // GCC 4.0.0 cannot compile this */ - p += UBYTES(needle_len); + p = (UChar *)p + UBYTES(needle_len); count++; } } else { if (needle_len == 1) { cmp = ((char *)needle)[0]; - while ((p = memchr(p, cmp, endp - p))) { + while ((p = memchr(p, cmp, (char *)endp - (char *)p))) { count++; - (char *)p++; + p = (char *)p + 1; } } else { while ((p = php_memnstr((char *)p, (char *)needle, needle_len, (char *)endp))) { /*(char *)p += needle_len; // GCC 4.0.0 cannot compile this */ - p += needle_len; + p = (char *)p + needle_len; count++; } } @@ -5420,7 +5420,7 @@ } else { for (i = 0; i left_pad; i++) *((char *)result + result_len++) = *((char *)padstr + (i % padstr_len)); - memcpy(result + result_len, input, input_len); + memcpy((char *)result + result_len, input, input_len); result_len += input_len; for (i = 0; i right_pad; i++) *((char *)result + result_len++) = *((char *)padstr + (i % padstr_len)); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Sat Aug 27 15:14:05 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of substr_count() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.473r2=1.474ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.473 php-src/ext/standard/string.c:1.474 --- php-src/ext/standard/string.c:1.473 Fri Aug 26 06:21:07 2005 +++ php-src/ext/standard/string.c Sat Aug 27 15:14:05 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.473 2005/08/26 10:21:07 rolland Exp $ */ +/* $Id: string.c,v 1.474 2005/08/27 19:14:05 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -5202,62 +5202,91 @@ Returns the number of times a substring occurs in the string */ PHP_FUNCTION(substr_count) { - zval **haystack, **needle, **offset, **length; + void *haystack, *needle; + int32_t haystack_len, needle_len; + zend_uchar haystack_type, needle_type; + long offset = 0, length = 0; int ac = ZEND_NUM_ARGS(); int count = 0; - char *p, *endp, cmp; - - if (ac 2 || ac 4 || zend_get_parameters_ex(ac, haystack, needle, offset, length) == FAILURE) { - WRONG_PARAM_COUNT; + void *p, *endp, *tmp; + int32_t i, j; + char cmp; + + if (zend_parse_parameters(ac TSRMLS_CC, TT|ll, + haystack, haystack_len, haystack_type, + needle, needle_len, needle_type, + offset, length) == FAILURE) { + return; } - convert_to_string_ex(haystack); - convert_to_string_ex(needle); - - if (Z_STRLEN_PP(needle) == 0) { + if (needle_len == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, Empty substring.); RETURN_FALSE; } - - p = Z_STRVAL_PP(haystack); - endp = p + Z_STRLEN_PP(haystack); - + + if (haystack_type == IS_UNICODE) { + p = (UChar *)haystack; + endp = (UChar *)haystack + haystack_len; + } else { + p = (char *)haystack; + endp = (char *)haystack + haystack_len; + } + if (ac 2) { - convert_to_long_ex(offset); - if (Z_LVAL_PP(offset) 0) { + if (offset 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset should be greater then or equal to 0.); - RETURN_FALSE; + RETURN_FALSE; + } + if (haystack_type == IS_UNICODE) { + i = 0; + U16_FWD_N((UChar *)haystack, i, haystack_len, offset); + p = (UChar *)haystack + i; + } else { + p = (char *)haystack + offset; } - p += Z_LVAL_PP(offset); if (p endp) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset value %ld exceeds string length., Z_LVAL_PP(offset)); - RETURN_FALSE; + php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset value %ld exceeds string length., offset); + RETURN_FALSE; } if (ac == 4) { - convert_to_long_ex(length); - if (Z_LVAL_PP(length) = 0) { + if (length 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, Length should be greater than 0.); - RETURN_FALSE; + RETURN_FALSE; } - if ((p + Z_LVAL_PP(length)) endp) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, Length value %ld exceeds string length., Z_LVAL_PP(length)); + if (haystack_type == IS_UNICODE) { + j = i; + i = 0; + U16_FWD_N((UChar *)p, i, haystack_len-j, length); + tmp = (UChar *)p + i; + } else { + tmp = (char *)p + length; + } + if (tmp endp) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset value %ld exceeds string length., offset); RETURN_FALSE; + } else { + endp = tmp; } - endp = p + Z_LVAL_PP(length); } } - - if (Z_STRLEN_PP(needle) == 1) { - cmp =
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Fri Aug 26 06:21:09 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Updated strrev() to handle base+combining sequences http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.472r2=1.473ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.472 php-src/ext/standard/string.c:1.473 --- php-src/ext/standard/string.c:1.472 Tue Aug 23 08:53:28 2005 +++ php-src/ext/standard/string.c Fri Aug 26 06:21:07 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.472 2005/08/23 12:53:28 dmitry Exp $ */ +/* $Id: string.c,v 1.473 2005/08/26 10:21:07 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -3222,7 +3222,9 @@ { zval **str; char *s, *e, *n, *p; - UChar *u_s, *u_e, *u_n, *u_p; + int32_t i, x1, x2; + UChar32 ch; + UChar *u_s, *u_n, *u_p; if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, str) == FAILURE) { WRONG_PARAM_COUNT; @@ -3236,15 +3238,22 @@ u_n = eumalloc(Z_USTRLEN_PP(str)+1); u_p = u_n; u_s = Z_USTRVAL_PP(str); - u_e = u_s + Z_USTRLEN_PP(str) - 1; - while (u_e = u_s) { - if (U16_IS_TRAIL(*u_e)) { - *u_p = *(u_e-1); - *(u_p+1) = *u_e; - u_e -= 2; u_p += 2; + i = Z_USTRLEN_PP(str); + while (i 0) { + U16_PREV(u_s, 0, i, ch); + if (u_getCombiningClass(ch) == 0) { + u_p += zend_codepoint_to_uchar(ch, u_p); } else { - *u_p++ = *u_e--; + x2 = i; + do { + U16_PREV(u_s, 0, i, ch); + } while (u_getCombiningClass(ch) != 0); + x1 = i; + while (x1 = x2) { + U16_NEXT(u_s, x1, Z_USTRLEN_PP(str), ch); + u_p += zend_codepoint_to_uchar(ch, u_p); + } } } *u_p = 0; -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Fri Aug 19 06:59:21 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode capable impl() of strrev() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.466r2=1.467ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.466 php-src/ext/standard/string.c:1.467 --- php-src/ext/standard/string.c:1.466 Thu Aug 18 18:37:22 2005 +++ php-src/ext/standard/string.c Fri Aug 19 06:59:19 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.466 2005/08/18 22:37:22 andrei Exp $ */ +/* $Id: string.c,v 1.467 2005/08/19 10:59:19 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -3212,25 +3212,51 @@ { zval **str; char *s, *e, *n, *p; + UChar *u_s, *u_e, *u_n, *u_p; if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, str) == FAILURE) { WRONG_PARAM_COUNT; } - convert_to_string_ex(str); - - n = emalloc(Z_STRLEN_PP(str)+1); - p = n; - - s = Z_STRVAL_PP(str); - e = s + Z_STRLEN_PP(str); - - while (--e=s) { - *p++ = *e; + + if (Z_TYPE_PP(str) != IS_UNICODE Z_TYPE_PP(str) != IS_BINARY Z_TYPE_PP(str) != IS_STRING) { + convert_to_text_ex(str); + } + + if (Z_TYPE_PP(str) == IS_UNICODE) { + u_n = eumalloc(Z_USTRLEN_PP(str)+1); + u_p = u_n; + u_s = Z_USTRVAL_PP(str); + u_e = u_s + Z_USTRLEN_PP(str) - 1; + + while (u_e = u_s) { + if (U16_IS_TRAIL(*u_e)) { + *u_p = *(u_e-1); + *(u_p+1) = *u_e; + u_e -= 2; u_p += 2; + } else { + *u_p++ = *u_e--; + } + } + *u_p = 0; + } else { + n = emalloc(Z_STRLEN_PP(str)+1); + p = n; + s = Z_STRVAL_PP(str); + e = s + Z_STRLEN_PP(str); + + while (--e = s) { + *(p++) = *e; + } + *p = '\0'; } - *p = '\0'; - - RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0); + if (Z_TYPE_PP(str) == IS_UNICODE) { + RETVAL_UNICODEL(u_n, Z_USTRLEN_PP(str), 0); + } else if (Z_TYPE_PP(str) == IS_BINARY) { + RETVAL_BINARYL(n, Z_BINLEN_PP(str), 0); + } else { + RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0); + } } /* }}} */ -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] Not getting mails from list
Hello Last week, I requested subcription to the foll lists via http://www.php.net/mailing-lists.php php-internals, php-i18n, php-announce, zend-engine-cvs I'm not receiving any mails from these lists. I've already sent the confirmation replies, twice. Do I have to do anything else ? I know this is not the right list to raise this, but I didn't know of any other ID to ping. Thanks Rolland -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
RE: [PHP-CVS] cvs: php-src /ext/standard string.c
--- Dmitry Stogov [EMAIL PROTECTED] wrote: Hi Roland, Please be more carefull with your patches. Compile PHP with --enable-debug and run make test and make utest before and after patch to see that you don't break something. Now I fixed all (I hope) implode() bugs that you introduced. Dmitry. Thanks a lot for the tips, that really helped. One Q: when/how is the memory alloc'ed by a SEPARATE_ZVAL() free'd ? Should I be calling any funcn to do this ? -- Rolland -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Aug 17 03:14:14 2005 EDT Modified files: /php-src/ext/standard string.c Log: Updated implode() impl as per Andrei's comments [http://news.php.net/php.cvs/33457] http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.457r2=1.458ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.457 php-src/ext/standard/string.c:1.458 --- php-src/ext/standard/string.c:1.457 Tue Aug 16 16:22:33 2005 +++ php-src/ext/standard/string.c Wed Aug 17 03:14:12 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.457 2005/08/16 20:22:33 helly Exp $ */ +/* $Id: string.c,v 1.458 2005/08/17 07:14:12 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -1144,14 +1144,11 @@ PHPAPI void php_implode(zval *delim, zval *arr, zval *retval) { zend_uchar return_type; - int numelems, i; + int numelems, i=0; HashPositionpos; zval**tmp; TSRMLS_FETCH(); - if (Z_TYPE_P(delim) != IS_UNICODE Z_TYPE_P(delim) != IS_BINARY) { - convert_to_string_ex(delim); - } Z_TYPE_P(retval) = return_type = Z_TYPE_P(delim); /* ... to start off */ /* Setup return value */ @@ -1169,18 +1166,14 @@ } zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), pos); - for (i = 1 ; i = numelems ; i++) { - if (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **)tmp, pos) != SUCCESS) { - /* Shouldn't happen ? */ - return; - } - zend_hash_move_forward_ex(Z_ARRVAL_P(arr), pos); + while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **)tmp, pos) == SUCCESS) { if (Z_TYPE_PP(tmp) != return_type) { /* Convert to common type, if possible */ if (return_type == IS_UNICODE) { if (Z_TYPE_PP(tmp) == IS_BINARY) { /* ERROR */ - php_error_docref(NULL TSRMLS_CC, E_WARNING, Mixed string types); + php_error_docref(NULL TSRMLS_CC, E_WARNING, + Cannot mix binary strings with other string types); efree(Z_USTRVAL_P(retval)); ZVAL_FALSE(retval); return; @@ -1191,7 +1184,8 @@ } else if (return_type == IS_BINARY) { if (Z_TYPE_PP(tmp) == IS_UNICODE || Z_TYPE_PP(tmp) == IS_STRING) { /* ERROR */ - php_error_docref(NULL TSRMLS_CC, E_WARNING, Mixed string types); + php_error_docref(NULL TSRMLS_CC, E_WARNING, + Cannot mix binary strings with other string types); efree(Z_BINVAL_P(retval)); ZVAL_FALSE(retval); return; @@ -1207,7 +1201,8 @@ Z_TYPE_P(retval) = return_type = IS_UNICODE; } else if (Z_TYPE_PP(tmp) == IS_BINARY) { /* ERROR */ - php_error_docref(NULL TSRMLS_CC, E_WARNING, Mixed string types); + php_error_docref(NULL TSRMLS_CC, E_WARNING, + Cannot mix binary strings with other string types); efree(Z_STRVAL_P(retval)); ZVAL_FALSE(retval); return; @@ -1222,30 +1217,30 @@ if (return_type == IS_UNICODE) { Z_USTRVAL_P(retval) = eurealloc(Z_USTRVAL_P(retval), Z_USTRLEN_P(retval)+Z_USTRLEN_PP(tmp)); - memcpy(Z_USTRVAL_P(retval)+Z_USTRLEN_P(retval), - Z_USTRVAL_PP(tmp), Z_USTRLEN_PP(tmp)*sizeof(UChar)); + memcpy(Z_USTRVAL_P(retval)+Z_USTRLEN_P(retval), Z_USTRVAL_PP(tmp), + UBYTES(Z_USTRLEN_PP(tmp))); Z_USTRLEN_P(retval) += Z_USTRLEN_PP(tmp); - if (i numelems) { /* Append delim */ + if (++i numelems) { /* Append delim */
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Aug 17 06:26:03 2005 EDT Modified files: /php-src/ext/standard string.c Log: php_u_trim_range(): Alloc UChar32 units rather than UChar http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.459r2=1.460ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.459 php-src/ext/standard/string.c:1.460 --- php-src/ext/standard/string.c:1.459 Wed Aug 17 03:59:28 2005 +++ php-src/ext/standard/string.c Wed Aug 17 06:26:02 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.459 2005/08/17 07:59:28 dmitry Exp $ */ +/* $Id: string.c,v 1.460 2005/08/17 10:26:02 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -610,7 +610,7 @@ c = input[0]; if ( (input+3 end) input[1] == '.' input[2] == '.' input[3] = c ) { tmp_len += (input[3] - c + 1); - tmp = (UChar32 *)erealloc(tmp, tmp_len*sizeof(UChar)); + tmp = (UChar32 *)erealloc(tmp, tmp_len*sizeof(UChar32)); for ( ; c = input[3] ; c++ ) { if ( U_IS_UNICODE_CHAR(c) ) tmp[idx++] = c; } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Aug 17 13:33:21 2005 EDT Modified files: /php-src/ext/standard string.c Log: Unicode impl of ucfirst() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.461r2=1.462ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.461 php-src/ext/standard/string.c:1.462 --- php-src/ext/standard/string.c:1.461 Wed Aug 17 07:36:30 2005 +++ php-src/ext/standard/string.c Wed Aug 17 13:33:19 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.461 2005/08/17 11:36:30 dmitry Exp $ */ +/* $Id: string.c,v 1.462 2005/08/17 17:33:19 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2866,6 +2866,39 @@ } /* }}} */ +/* {{{ proto php_u_ucfirst + Makes an Unicode string's first character uppercase */ +static void php_u_ucfirst(zval *ustr, zval *return_value) +{ + UChar32 lc, uc; + UChar tmp[2] = {0, 0}; /* UChar32 will be converted to upto 2 UChar units ? */ + int32_t tmp_len = 2; + int32_t pos = 0; + UErrorCode err = U_ZERO_ERROR; + + U16_NEXT(Z_USTRVAL_P(ustr), pos, Z_USTRLEN_P(ustr), lc); + uc = u_toupper(lc); + if ( uc == lc ) { + ZVAL_UNICODEL(return_value, Z_USTRVAL_P(ustr), Z_USTRLEN_P(ustr), 1); + return; + } + + u_strFromUTF32(tmp, tmp_len, tmp_len, uc, 1, err); + if (U_FAILURE(err)) { + ZVAL_EMPTY_UNICODE(return_value); + return; + } + + Z_USTRVAL_P(return_value) = eumalloc(tmp_len+Z_USTRLEN_P(ustr)-pos+1); + Z_USTRVAL_P(return_value)[0] = tmp[0]; + if (tmp_len 1) { + Z_USTRVAL_P(return_value)[1] = tmp[1]; + } + memcpy(Z_USTRVAL_P(return_value)+tmp_len, Z_USTRVAL_P(ustr)+pos, UBYTES(Z_USTRLEN_P(ustr)-pos+1)); + Z_USTRLEN_P(return_value) = tmp_len+Z_USTRLEN_P(ustr)-pos; +} +/* }}} */ + /* {{{ proto string ucfirst(string str) Makes a string's first character uppercase */ PHP_FUNCTION(ucfirst) @@ -2875,14 +2908,29 @@ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, str) == FAILURE) { WRONG_PARAM_COUNT; } - convert_to_string_ex(str); - if (!Z_STRLEN_PP(str)) { + if (Z_TYPE_PP(str) != IS_UNICODE Z_TYPE_PP(str) != IS_BINARY Z_TYPE_PP(str) != IS_STRING) { + convert_to_text_ex(str); + } + + if (Z_TYPE_PP(str) == IS_UNICODE !Z_USTRLEN_PP(str)) { + RETURN_EMPTY_UNICODE(); + } else if (Z_TYPE_PP(str) == IS_BINARY !Z_BINLEN_PP(str)) { + RETURN_EMPTY_BINARY(); + } else if (!Z_STRLEN_PP(str)) { RETURN_EMPTY_STRING(); } - ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); - *Z_STRVAL_P(return_value) = toupper((unsigned char) *Z_STRVAL_P(return_value)); + if (Z_TYPE_PP(str) == IS_UNICODE) { + Z_TYPE_P(return_value) = IS_UNICODE; + php_u_ucfirst(*str, return_value); + } else if (Z_TYPE_PP(str) == IS_BINARY) { + ZVAL_BINARYL(return_value, Z_BINVAL_PP(str), Z_BINLEN_PP(str), 1); + *Z_BINVAL_P(return_value) = toupper((unsigned char) *Z_BINVAL_P(return_value)); + } else { + ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); + *Z_STRVAL_P(return_value) = toupper((unsigned char) *Z_STRVAL_P(return_value)); + } } /* }}} */ -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Wed Aug 17 16:29:02 2005 EDT Modified files: /php-src/ext/standard string.c Log: Unicode-capable impl of ucwords() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.463r2=1.464ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.463 php-src/ext/standard/string.c:1.464 --- php-src/ext/standard/string.c:1.463 Wed Aug 17 13:39:04 2005 +++ php-src/ext/standard/string.c Wed Aug 17 16:29:00 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.463 2005/08/17 17:39:04 andrei Exp $ */ +/* $Id: string.c,v 1.464 2005/08/17 20:29:00 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2940,6 +2940,53 @@ } /* }}} */ +/* {{{ proto php_u_ucwords() + Uppercase the first character of every word in an Unicode string */ +static void php_u_ucwords(zval *ustr, zval *retval) +{ + UChar32 *codepts; + int32_t len, retval_len; + int32_t i; + UErrorCode err; + + len = Z_USTRLEN_P(ustr); + codepts = (UChar32 *)emalloc((len+1)*sizeof(UChar32)); + err = U_ZERO_ERROR; + u_strToUTF32(codepts, len+1, len, Z_USTRVAL_P(ustr), len, err); + if (U_FAILURE(err)) { + efree(codepts); + ZVAL_EMPTY_UNICODE(retval); + return; + } + + codepts[0] = u_toupper(codepts[0]); + for (i = 1; i len ; i++) { + if (u_isWhitespace(codepts[i-1]) == TRUE) { + codepts[i] = u_toupper(codepts[i]); + } + } + + retval_len = len; + Z_USTRVAL_P(retval) = eumalloc(retval_len+1); + err = U_ZERO_ERROR; + u_strFromUTF32(Z_USTRVAL_P(retval), retval_len+1, retval_len, codepts, len, err); + if (U_FAILURE(err) == U_BUFFER_OVERFLOW_ERROR) { + err = U_ZERO_ERROR; + Z_USTRVAL_P(retval) = eurealloc(Z_USTRVAL_P(retval), retval_len+1); + u_strFromUTF32(Z_USTRVAL_P(retval), retval_len+1, NULL, codepts, len, err); + } + + if (U_SUCCESS(err)) { + Z_USTRLEN_P(retval) = retval_len; + } else { + efree(Z_USTRVAL_P(retval)); + ZVAL_EMPTY_UNICODE(retval); + } + + efree(codepts); +} +/* }}} */ + /* {{{ proto string ucwords(string str) Uppercase the first character of every word in a string */ PHP_FUNCTION(ucwords) @@ -2950,19 +2997,35 @@ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, str) == FAILURE) { WRONG_PARAM_COUNT; } - convert_to_string_ex(str); - if (!Z_STRLEN_PP(str)) { + if (Z_TYPE_PP(str) != IS_UNICODE Z_TYPE_PP(str) != IS_BINARY Z_TYPE_PP(str) != IS_STRING) { + convert_to_text_ex(str); + } + + if (Z_TYPE_PP(str) == IS_UNICODE !Z_USTRLEN_PP(str)) { + RETURN_EMPTY_UNICODE(); + } else if (Z_TYPE_PP(str) == IS_BINARY !Z_BINLEN_PP(str)) { + RETURN_EMPTY_BINARY(); + } else if (!Z_STRLEN_PP(str)) { RETURN_EMPTY_STRING(); } - ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); - r = Z_STRVAL_P(return_value); + if (Z_TYPE_PP(str) == IS_UNICODE) { + Z_TYPE_P(return_value) = IS_UNICODE; + php_u_ucwords(*str, return_value); + } else { + if (Z_TYPE_PP(str) == IS_BINARY) { + ZVAL_BINARYL(return_value, Z_BINVAL_PP(str), Z_BINLEN_PP(str), 1); + } else { + ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); + } - *r = toupper((unsigned char) *r); - for (r_end = r + Z_STRLEN_P(return_value) - 1; r r_end; ) { - if (isspace((int) *(unsigned char *)r++)) { - *r = toupper((unsigned char) *r); + r = Z_STRVAL_P(return_value); + *r = toupper((unsigned char) *r); + for (r_end = r + Z_STRLEN_P(return_value) - 1; r r_end; ) { + if (isspace((int) *(unsigned char *)r++)) { + *r = toupper((unsigned char) *r); + } } } } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Thu Aug 18 01:53:48 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Removed 'proto' from description of internal funcns: ucfirst()/ucwords() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.464r2=1.465ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.464 php-src/ext/standard/string.c:1.465 --- php-src/ext/standard/string.c:1.464 Wed Aug 17 16:29:00 2005 +++ php-src/ext/standard/string.c Thu Aug 18 01:53:46 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.464 2005/08/17 20:29:00 rolland Exp $ */ +/* $Id: string.c,v 1.465 2005/08/18 05:53:46 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2872,7 +2872,7 @@ } /* }}} */ -/* {{{ proto php_u_ucfirst +/* {{{ php_u_ucfirst() Makes an Unicode string's first character uppercase */ static void php_u_ucfirst(zval *ustr, zval *return_value) { @@ -2940,7 +2940,7 @@ } /* }}} */ -/* {{{ proto php_u_ucwords() +/* {{{ php_u_ucwords() Uppercase the first character of every word in an Unicode string */ static void php_u_ucwords(zval *ustr, zval *retval) { -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard php_string.h string.c
rolland Tue Aug 16 02:02:57 2005 EDT Modified files: /php-src/ext/standard php_string.h string.c Log: php_trim() takes extra arg to determine string type to be returned http://cvs.php.net/diff.php/php-src/ext/standard/php_string.h?r1=1.88r2=1.89ty=u Index: php-src/ext/standard/php_string.h diff -u php-src/ext/standard/php_string.h:1.88 php-src/ext/standard/php_string.h:1.89 --- php-src/ext/standard/php_string.h:1.88 Thu Aug 11 19:35:59 2005 +++ php-src/ext/standard/php_string.h Tue Aug 16 02:02:55 2005 @@ -17,7 +17,7 @@ +--+ */ -/* $Id: php_string.h,v 1.88 2005/08/11 23:35:59 andrei Exp $ */ +/* $Id: php_string.h,v 1.89 2005/08/16 06:02:55 rolland Exp $ */ /* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */ @@ -132,7 +132,7 @@ int needle_len, char *str, int str_len, int *_new_length, int case_sensitivity, int *replace_count); PHPAPI char *php_str_to_str(char *haystack, int length, char *needle, int needle_len, char *str, int str_len, int *_new_length); -PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zval *return_value, int mode TSRMLS_DC); +PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zend_uchar str_type, zval *return_value, int mode TSRMLS_DC); PHPAPI size_t php_strip_tags(char *rbuf, int len, int *state, char *allow, int allow_len); PHPAPI int php_char_to_str_ex(char *str, uint len, char from, char *to, int to_len, pval *result, int case_sensitivity, int *replace_count); PHPAPI int php_char_to_str(char *str, uint len, char from, char *to, int to_len, pval *result); http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.453r2=1.454ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.453 php-src/ext/standard/string.c:1.454 --- php-src/ext/standard/string.c:1.453 Mon Aug 15 15:12:59 2005 +++ php-src/ext/standard/string.c Tue Aug 16 02:02:56 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.453 2005/08/15 19:12:59 johannes Exp $ */ +/* $Id: string.c,v 1.454 2005/08/16 06:02:56 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -538,7 +538,7 @@ * mode 3 : trim left and right * what indicates which chars are to be trimmed. NULL-default (' \t\n\r\v\0') */ -PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zval *return_value, int mode TSRMLS_DC) +PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zend_uchar str_type, zval *return_value, int mode TSRMLS_DC) { register int i; int trimmed = 0; @@ -572,7 +572,11 @@ } if (return_value) { - RETVAL_STRINGL(c, len, 1); + if ( str_type == IS_BINARY ) { + RETVAL_BINARYL(c, len, 1); + } else { + RETVAL_STRINGL(c, len, 1); + } } else { return estrndup(c, len); } @@ -750,13 +754,13 @@ if ( str_type == IS_UNICODE ) { php_u_trim(str, str_len, what, what_len, return_value, mode TSRMLS_CC); } else { - php_trim(str, str_len, what, what_len, return_value, mode TSRMLS_CC); + php_trim(str, str_len, what, what_len, str_type, return_value, mode TSRMLS_CC); } } else { if ( str_type == IS_UNICODE ) { php_u_trim(str, str_len, NULL, 0, return_value, mode TSRMLS_CC); } else { - php_trim(str, str_len, NULL, 0, return_value, mode TSRMLS_CC); + php_trim(str, str_len, NULL, 0, str_type, return_value, mode TSRMLS_CC); } } } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/simplexml simplexml.c /ext/standard http_fopen_wrapper.c
rolland Tue Aug 16 02:05:00 2005 EDT Modified files: /php-src/ext/simplexml simplexml.c /php-src/ext/standard http_fopen_wrapper.c Log: Updated calls to php_trim() to provide arg for returned string type http://cvs.php.net/diff.php/php-src/ext/simplexml/simplexml.c?r1=1.154r2=1.155ty=u Index: php-src/ext/simplexml/simplexml.c diff -u php-src/ext/simplexml/simplexml.c:1.154 php-src/ext/simplexml/simplexml.c:1.155 --- php-src/ext/simplexml/simplexml.c:1.154 Fri Aug 12 10:08:25 2005 +++ php-src/ext/simplexml/simplexml.c Tue Aug 16 02:04:58 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: simplexml.c,v 1.154 2005/08/12 14:08:25 sebastian Exp $ */ +/* $Id: simplexml.c,v 1.155 2005/08/16 06:04:58 rolland Exp $ */ #ifdef HAVE_CONFIG_H #include config.h @@ -337,7 +337,7 @@ trim_zv = *member; zval_copy_ctor(trim_zv); convert_to_string(trim_zv); - php_trim(Z_STRVAL(trim_zv), Z_STRLEN(trim_zv), NULL, 0, tmp_zv, 3 TSRMLS_CC); + php_trim(Z_STRVAL(trim_zv), Z_STRLEN(trim_zv), NULL, 0, IS_STRING, tmp_zv, 3 TSRMLS_CC); zval_dtor(trim_zv); member = tmp_zv; } @@ -1736,7 +1736,7 @@ { php_info_print_table_start(); php_info_print_table_header(2, Simplexml support, enabled); - php_info_print_table_row(2, Revision, $Revision: 1.154 $); + php_info_print_table_row(2, Revision, $Revision: 1.155 $); php_info_print_table_row(2, Schema support, #ifdef LIBXML_SCHEMAS_ENABLED enabled); http://cvs.php.net/diff.php/php-src/ext/standard/http_fopen_wrapper.c?r1=1.99r2=1.100ty=u Index: php-src/ext/standard/http_fopen_wrapper.c diff -u php-src/ext/standard/http_fopen_wrapper.c:1.99 php-src/ext/standard/http_fopen_wrapper.c:1.100 --- php-src/ext/standard/http_fopen_wrapper.c:1.99 Wed Aug 3 10:08:04 2005 +++ php-src/ext/standard/http_fopen_wrapper.c Tue Aug 16 02:04:59 2005 @@ -19,7 +19,7 @@ | Sara Golemon [EMAIL PROTECTED] | +--+ */ -/* $Id: http_fopen_wrapper.c,v 1.99 2005/08/03 14:08:04 sniper Exp $ */ +/* $Id: http_fopen_wrapper.c,v 1.100 2005/08/16 06:04:59 rolland Exp $ */ #include php.h #include php_globals.h @@ -265,7 +265,7 @@ Z_STRLEN_PP(tmpzval)) { /* Remove newlines and spaces from start and end, php_trim will estrndup() */ - tmp = php_trim(Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), NULL, 0, NULL, 3 TSRMLS_CC); + tmp = php_trim(Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), NULL, 0, IS_STRING, NULL, 3 TSRMLS_CC); if (strlen(tmp) 0) { if (!header_init) { /* Remove post headers for redirects */ int l = strlen(tmp); @@ -291,7 +291,7 @@ } } efree(tmp_c); - tmp_c = php_trim(tmp, strlen(tmp), NULL, 0, NULL, 3 TSRMLS_CC); + tmp_c = php_trim(tmp, strlen(tmp), NULL, 0, IS_STRING, NULL, 3 TSRMLS_CC); efree(tmp); tmp = tmp_c; } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/standard string.c
rolland Tue Aug 16 08:04:14 2005 EDT Modified files: /php-src/ext/standard string.c Log: Unicode capable impl of implode() http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.454r2=1.455ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.454 php-src/ext/standard/string.c:1.455 --- php-src/ext/standard/string.c:1.454 Tue Aug 16 02:02:56 2005 +++ php-src/ext/standard/string.c Tue Aug 16 08:04:13 2005 @@ -18,7 +18,7 @@ +--+ */ -/* $Id: string.c,v 1.454 2005/08/16 06:02:56 rolland Exp $ */ +/* $Id: string.c,v 1.455 2005/08/16 12:04:13 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -1141,36 +1141,115 @@ /* {{{ php_implode */ -PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value) +PHPAPI void php_implode(zval *delim, zval *arr, zval *retval) { - zval **tmp; - HashPosition pos; - smart_str implstr = {0}; - intnumelems, i = 0; + zend_uchar return_type; + int numelems, i; + HashPositionpos; + zval**tmp; + void*elem; + int32_t elem_chars, elem_len; + + if (Z_TYPE_P(delim) != IS_UNICODE Z_TYPE_P(delim) != IS_BINARY) { + convert_to_string_ex(delim); + } + Z_TYPE_P(retval) = return_type = Z_TYPE_P(delim); /* ... to start off */ + + /* Setup return value */ + if (return_type == IS_UNICODE) { + ZVAL_EMPTY_UNICODE(retval); + } else if (return_type == IS_BINARY) { + ZVAL_EMPTY_BINARY(retval); + } else { + ZVAL_EMPTY_STRING(retval); + } numelems = zend_hash_num_elements(Z_ARRVAL_P(arr)); - if (numelems == 0) { - RETURN_EMPTY_STRING(); + return; } zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), pos); - - while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **) tmp, pos) == SUCCESS) { - if ((*tmp)-type != IS_STRING) { - SEPARATE_ZVAL(tmp); - convert_to_string(*tmp); - } - - smart_str_appendl(implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp)); - if (++i != numelems) { - smart_str_appendl(implstr, Z_STRVAL_P(delim), Z_STRLEN_P(delim)); + for (i = 1 ; i = numelems ; i++) { + if (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **)tmp, pos) != SUCCESS) { + /* Shouldn't happen ? */ + return; } zend_hash_move_forward_ex(Z_ARRVAL_P(arr), pos); + if (Z_TYPE_PP(tmp) != return_type) { + /* Convert to common type, if possible */ + if (return_type == IS_UNICODE) { + if (Z_TYPE_PP(tmp) == IS_BINARY) { + /* ERROR */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, Mixed string types); + efree(Z_USTRVAL_P(retval)); + ZVAL_FALSE(retval); + return; + } else { + SEPARATE_ZVAL(tmp); + convert_to_unicode_ex(tmp); + } + } else if (return_type == IS_BINARY) { + if (Z_TYPE_PP(tmp) == IS_UNICODE || Z_TYPE_PP(tmp) == IS_STRING) { + /* ERROR */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, Mixed string types); + efree(Z_BINVAL_P(retval)); + ZVAL_FALSE(retval); + return; + } else { + SEPARATE_ZVAL(tmp); + convert_to_binary_ex(tmp); + } + } else { + if (Z_TYPE_PP(tmp) == IS_UNICODE) { + /* Convert IS_STRING up to IS_UNICODE */ + convert_to_unicode_ex(retval); + convert_to_unicode_ex(delim); + Z_TYPE_P(retval) = return_type = IS_UNICODE; + } else if (Z_TYPE_PP(tmp) == IS_BINARY) { + /* ERROR */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, Mixed