[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-24 Thread Rolland Santimano
rolland Mon Oct 24 10:35:05 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of strrchr()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.503r2=1.504ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.503 php-src/ext/standard/string.c:1.504
--- php-src/ext/standard/string.c:1.503 Sat Oct 22 09:36:55 2005
+++ php-src/ext/standard/string.c   Mon Oct 24 10:35:02 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.503 2005/10/22 13:36:55 rolland Exp $ */
+/* $Id: string.c,v 1.504 2005/10/24 14:35:02 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2714,30 +2714,82 @@
 }
 /* }}} */
 
+/* {{{ php_u_strrchr
+ */
+UChar *php_u_strrchr(UChar *s, UChar32 ch, int32_t s_len)
+{
+   UChar32 ch1;
+   int32_t i = s_len;
+
+   while (i  0) {
+   U16_PREV(s, 0, i, ch1);
+   if (ch1 == ch) {
+   return (s+i);
+   }
+   }
+   return NULL;
+}
+/* }}} */
+
 /* {{{ proto string strrchr(string haystack, string needle)
Finds the last occurrence of a character in a string within another */
 PHP_FUNCTION(strrchr)
 {
-   zval **haystack, **needle;
-   char *found = NULL;
-   long found_offset;
-   
-   if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, haystack, 
needle) ==
-   FAILURE) {
+   zval *haystack, *needle;
+   zend_uchar str_type;
+   UChar32 ch;
+   void *found = NULL;
+   int32_t found_offset;
+
+   if (ZEND_NUM_ARGS() != 2 || zend_parse_parameters(2 TSRMLS_CC, zz, 
haystack, needle) == FAILURE) {
WRONG_PARAM_COUNT;
}
-   convert_to_string_ex(haystack);
+   if (Z_TYPE_P(haystack) != IS_UNICODE || Z_TYPE_P(haystack) != IS_BINARY 
|| Z_TYPE_P(haystack) != IS_STRING) {
+   convert_to_string(haystack);
+   }
 
-   if (Z_TYPE_PP(needle) == IS_STRING) {
-   found = strrchr(Z_STRVAL_PP(haystack), *Z_STRVAL_PP(needle));
+   if (Z_TYPE_P(needle) == IS_UNICODE || Z_TYPE_P(needle) == IS_BINARY || 
Z_TYPE_P(needle) == IS_STRING) {
+   if (Z_TYPE_P(needle) != Z_TYPE_P(haystack)) {
+   str_type = zend_get_unified_string_type(2 TSRMLS_CC, 
Z_TYPE_P(haystack), Z_TYPE_P(needle));
+   if (str_type == (zend_uchar)-1) {
+   convert_to_explicit_type(haystack, IS_BINARY);
+   convert_to_explicit_type(needle, IS_BINARY);
+   } else {
+   convert_to_explicit_type(haystack, str_type);
+   convert_to_explicit_type(needle, str_type);
+   }
+   }
+   if (Z_TYPE_P(haystack) == IS_UNICODE) {
+   U16_GET(Z_USTRVAL_P(needle), 0, 0, Z_USTRLEN_P(needle), 
ch);
+   found = php_u_strrchr(Z_USTRVAL_P(haystack), ch, 
Z_USTRLEN_P(haystack));
+   } else {
+   found = strrchr(Z_STRVAL_P(haystack), 
*Z_STRVAL_P(needle));
+   }
} else {
-   convert_to_long_ex(needle);
-   found = strrchr(Z_STRVAL_PP(haystack), (char) 
Z_LVAL_PP(needle));
+   convert_to_long(needle);
+   if (Z_TYPE_P(haystack) == IS_UNICODE) {
+   if (Z_LVAL_P(needle)  0 || Z_LVAL_P(needle)  
0x10) {
+   php_error(E_WARNING, Needle argument codepoint 
value out of range (0 - 0x10));
+   RETURN_FALSE;
+   }
+   found = php_u_strrchr(Z_USTRVAL_P(haystack), 
(UChar32)Z_LVAL_P(needle), Z_USTRLEN_P(haystack));
+   } else {
+   found = strrchr(Z_STRVAL_P(haystack), 
(char)Z_LVAL_P(needle));
+   }
}
 
if (found) {
-   found_offset = found - Z_STRVAL_PP(haystack);
-   RETURN_STRINGL(found, Z_STRLEN_PP(haystack) - found_offset, 1);
+   if (Z_TYPE_P(haystack) == IS_UNICODE) {
+   found_offset = (UChar *)found - Z_USTRVAL_P(haystack);
+   RETURN_UNICODEL((UChar *)found, Z_USTRLEN_P(haystack) - 
found_offset, 1);
+   } else {
+   found_offset = (char *)found - Z_STRVAL_P(haystack);
+   if (Z_TYPE_P(haystack) == IS_BINARY) {
+   RETURN_BINARYL((char *)found, 
Z_BINLEN_P(haystack) - found_offset, 1);
+   } else {
+   RETURN_STRINGL((char *)found, 
Z_STRLEN_P(haystack) - found_offset, 1);
+   }
+   }
} else {
RETURN_FALSE;
}

-- 
PHP CVS Mailing List (http://www.php.net/)

[PHP-CVS] cvs: php-src / unicode-progress.txt

2005-10-24 Thread Rolland Santimano
rolland Mon Oct 24 10:51:18 2005 EDT

  Modified files:  
/php-srcunicode-progress.txt 
  Log:
  - strip_tags(), str[c]spn(), strpbrk(), strrchr(), strrpos(), strtok()
  
  
  
http://cvs.php.net/diff.php/php-src/unicode-progress.txt?r1=1.10r2=1.11ty=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.10 php-src/unicode-progress.txt:1.11
--- php-src/unicode-progress.txt:1.10   Mon Oct  3 02:19:51 2005
+++ php-src/unicode-progress.txtMon Oct 24 10:51:17 2005
@@ -17,16 +17,23 @@
 similar_text()
 str_pad()
 str_repeat()
+strip_tags()
+strcspn()
 stripslashes()
+strpbrk()
 strpos()
+strrchr()
 strrev()
+strrpos()
+strspn()
 strstr()
+strtok()
 substr()
 substr_count()
 substr_replace()
 trim()
-ucwords()
 ucfirst()
+ucwords()
 
 
 Zend Engine

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



Re: [PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-22 Thread Rolland Santimano
--- Jani Taskinen [EMAIL PROTECTED] wrote:
 
  What's a codept ??
 
  --Jani

Ah, that would be codepoint

 On Sat, 22 Oct 2005, Rolland Santimano wrote:
 
 
  rolland Sat Oct 22 01:52:55 2005 EDT
 
   Modified files:
 /php-src/ext/standardstring.c
   Log:
   - php_u_stristr: Code comments
 
 
 

http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.499r2=1.500ty=u
  Index: php-src/ext/standard/string.c
  diff -u php-src/ext/standard/string.c:1.499
 php-src/ext/standard/string.c:1.500
  --- php-src/ext/standard/string.c:1.499 Thu Oct 20 15:25:52 2005
  +++ php-src/ext/standard/string.c   Sat Oct 22 01:52:53 2005
  @@ -18,7 +18,7 @@


+--+
   */
 
  -/* $Id: string.c,v 1.499 2005/10/20 19:25:52 rolland Exp $ */
  +/* $Id: string.c,v 1.500 2005/10/22 05:52:53 rolland Exp $ */
 
  /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
  @@ -1924,20 +1924,33 @@
 
  /* Have to do this by hand since lower-casing can change lengths
 by changing codepoints, and an offset within the lower-case 
  -  upper-case strings might be different codepoints
  +  upper-case strings might be different codepoints.
  +
  +  Find an occurrence of the first codept of 't' in 's', and
  +  starting from this point, match the rest of the codepts of
 't'
  +  with those in 's'. Comparisons are performed against
 lower-case
  +  equivalents of the codepoints being matched.
  +
  +  'i'  'j' are indices used for extracting codepts 'ch1' 
  +  'ch2'. 'last' is offset in 's' where the search for 't'
  +  started, and indicates beginning of 't' in 's' for a
 successful
  +  match.
  */
  +

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-22 Thread Rolland Santimano
rolland Sat Oct 22 09:25:02 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of strpbrk()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.500r2=1.501ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.500 php-src/ext/standard/string.c:1.501
--- php-src/ext/standard/string.c:1.500 Sat Oct 22 01:52:53 2005
+++ php-src/ext/standard/string.c   Sat Oct 22 09:25:00 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.500 2005/10/22 05:52:53 rolland Exp $ */
+/* $Id: string.c,v 1.501 2005/10/22 13:25:00 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -6585,11 +6585,14 @@
Search a string for any of a set of characters */
 PHP_FUNCTION(strpbrk)
 {
-   char *haystack, *char_list;
-   int haystack_len, char_list_len;
-   char *p;
+   void *haystack, *char_list;
+   int32_t haystack_len, char_list_len;
+   zend_uchar haystack_type, char_list_type;
+   void *p = NULL;

-   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ss, haystack, 
haystack_len, char_list, char_list_len) == FAILURE) {
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, TT,
+ haystack, 
haystack_len, haystack_type,
+ char_list, 
char_list_len, char_list_type) == FAILURE) {
RETURN_FALSE;
}
 
@@ -6598,8 +6601,36 @@
RETURN_FALSE;   
}
 
-   if ((p = strpbrk(haystack, char_list))) {
-   RETURN_STRINGL(p, (haystack + haystack_len - p), 1);
+   if (haystack_type == IS_UNICODE) {
+   int32_t i, j;
+   UChar32 ch1, ch2;
+
+   for (i = 0 ; i  haystack_len ; ) {
+   U16_NEXT((UChar *)haystack, i, haystack_len, ch1);
+   for (j = 0 ; j  char_list_len ; ) {
+   U16_NEXT((UChar *)char_list, j, char_list_len, 
ch2);
+   if (ch1 == ch2) {
+   U16_BACK_1((UChar *)haystack, 0, i);
+   p = (UChar *)haystack + i;
+   break;
+   }
+   }
+   if (ch1 == ch2) {
+   break;
+   }
+   }
+   } else {
+   p = strpbrk((char *)haystack, (char *)char_list);
+   }
+
+   if (p) {
+   if (haystack_type == IS_UNICODE) {
+   RETURN_UNICODEL((UChar *)p, ((UChar *)haystack + 
haystack_len - (UChar *)p), 1);
+   } else if (haystack_type == IS_BINARY) {
+   RETURN_BINARYL((char *)p, ((char *)haystack + 
haystack_len - (char *)p), 1);
+   } else {
+   RETURN_STRINGL((char *)p, ((char *)haystack + 
haystack_len - (char *)p), 1);
+   }
} else {
RETURN_FALSE;
}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-22 Thread Rolland Santimano
rolland Sat Oct 22 09:32:52 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - php_u_stristr: s/codepts/codepoints/ - make Jani happy
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.501r2=1.502ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.501 php-src/ext/standard/string.c:1.502
--- php-src/ext/standard/string.c:1.501 Sat Oct 22 09:25:00 2005
+++ php-src/ext/standard/string.c   Sat Oct 22 09:32:51 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.501 2005/10/22 13:25:00 rolland Exp $ */
+/* $Id: string.c,v 1.502 2005/10/22 13:32:51 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -1917,7 +1917,7 @@
 
 /* {{{ php_u_stristr
Unicode version of case insensitve strstr */
-PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len)
+\PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len)
 {
int32_t i,j, last;
UChar32 ch1, ch2;
@@ -1926,12 +1926,12 @@
   by changing codepoints, and an offset within the lower-case 
   upper-case strings might be different codepoints.
 
-  Find an occurrence of the first codept of 't' in 's', and
-  starting from this point, match the rest of the codepts of 't'
-  with those in 's'. Comparisons are performed against lower-case
-  equivalents of the codepoints being matched.
+  Find an occurrence of the first codepoint of 't' in 's', and
+  starting from this point, match the rest of the codepoints of
+  't' with those in 's'. Comparisons are performed against
+  lower-case equivalents of the codepoints being matched.
 
-  'i'  'j' are indices used for extracting codepts 'ch1' 
+  'i'  'j' are indices used for extracting codepoints 'ch1' 
   'ch2'. 'last' is offset in 's' where the search for 't'
   started, and indicates beginning of 't' in 's' for a successful
   match.

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-22 Thread Rolland Santimano
rolland Sat Oct 22 09:36:57 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - php_u_stristr: Remove leading back-slash ... sheesh
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.502r2=1.503ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.502 php-src/ext/standard/string.c:1.503
--- php-src/ext/standard/string.c:1.502 Sat Oct 22 09:32:51 2005
+++ php-src/ext/standard/string.c   Sat Oct 22 09:36:55 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.502 2005/10/22 13:32:51 rolland Exp $ */
+/* $Id: string.c,v 1.503 2005/10/22 13:36:55 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -1917,7 +1917,7 @@
 
 /* {{{ php_u_stristr
Unicode version of case insensitve strstr */
-\PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len)
+PHPAPI UChar *php_u_stristr(UChar *s, UChar *t, int32_t s_len, int32_t t_len)
 {
int32_t i,j, last;
UChar32 ch1, ch2;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-21 Thread Rolland Santimano
rolland Sat Oct 22 01:52:55 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - php_u_stristr: Code comments
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.499r2=1.500ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.499 php-src/ext/standard/string.c:1.500
--- php-src/ext/standard/string.c:1.499 Thu Oct 20 15:25:52 2005
+++ php-src/ext/standard/string.c   Sat Oct 22 01:52:53 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.499 2005/10/20 19:25:52 rolland Exp $ */
+/* $Id: string.c,v 1.500 2005/10/22 05:52:53 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -1924,20 +1924,33 @@
 
/* Have to do this by hand since lower-casing can change lengths
   by changing codepoints, and an offset within the lower-case 
-  upper-case strings might be different codepoints
+  upper-case strings might be different codepoints.
+
+  Find an occurrence of the first codept of 't' in 's', and
+  starting from this point, match the rest of the codepts of 't'
+  with those in 's'. Comparisons are performed against lower-case
+  equivalents of the codepoints being matched.
+
+  'i'  'j' are indices used for extracting codepts 'ch1' 
+  'ch2'. 'last' is offset in 's' where the search for 't'
+  started, and indicates beginning of 't' in 's' for a successful
+  match.
*/
+
i = 0;
while (i = (s_len-t_len)) {
last = i;
U16_NEXT(s, i, s_len, ch1);
-   U16_GET(t, 0, 0, t_len, ch2);
+   j = 0;
+   U16_NEXT(t, j, t_len, ch2);
if (u_tolower(ch1) == u_tolower(ch2)) {
-   j = 0;
-   U16_FWD_1(t, j, t_len);
while (j  t_len) {
U16_NEXT(s, i, s_len, ch1);
U16_NEXT(t, j, t_len, ch2);
if (u_tolower(ch1) != u_tolower(ch2)) {
+   /* U16_NEXT() incr 'i' beyond 'ch1', 
re-adjust to
+  restart compare
+   */
U16_BACK_1(s, 0, i);
break;
}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-19 Thread Rolland Santimano
rolland Wed Oct 19 15:10:21 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of str[c]spn()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.497r2=1.498ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.497 php-src/ext/standard/string.c:1.498
--- php-src/ext/standard/string.c:1.497 Mon Oct 17 15:50:13 2005
+++ php-src/ext/standard/string.c   Wed Oct 19 15:10:13 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.497 2005/10/17 19:50:13 rolland Exp $ */
+/* $Id: string.c,v 1.498 2005/10/19 19:10:13 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -209,14 +209,16 @@
 
 static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior)
 {
-   char *s11, *s22;
-   int len1, len2;
-   long start, len;
+   void *s1, *s2;
+   int32_t len1, len2;
+   zend_uchar type1, type2;
+   long start, len; /* For UNICODE, these are codepoint units */

start = 0;
len = 0;
-   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ss|ll, s11, 
len1,
-   s22, len2, start, len) == FAILURE) {
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, TT|ll,
+ s1, len1, type1, 
s2, len2, type2,
+ start, len) == 
FAILURE) {
return;
}

@@ -246,18 +248,40 @@
len = len1 - start;
}
 
-   if (behavior == STR_STRSPN) {
-   RETURN_LONG(php_strspn(s11 + start /*str1_start*/,
-   s22 /*str2_start*/,
-   s11 + start + len /*str1_end*/,
-   s22 + len2 /*str2_end*/));
-   } else if (behavior == STR_STRCSPN) {
-   RETURN_LONG(php_strcspn(s11 + start /*str1_start*/,
-   s22 /*str2_start*/,
-   s11 + start + len /*str1_end*/,
-   s22 + len2 /*str2_end*/));
+   if (type1 == IS_UNICODE) {
+   UChar *u_start, *u_end;
+   int32_t i = 0;
+
+   U16_FWD_N((UChar*)s1, i, len1, start);
+   u_start = (UChar *)s1 + i;
+   U16_FWD_N((UChar *)s1, i, len1, len);
+   u_end = (UChar *)s1 + i;
+
+   if (behavior == STR_STRSPN) {
+   RETURN_LONG(php_u_strspn(u_start /*str1_start*/,
+(UChar 
*)s2 /*str2_start*/,
+u_end 
/*str1_end*/,
+(UChar 
*)s2 + len2 /*str2_end*/));
+   } else if (behavior == STR_STRCSPN) {
+   RETURN_LONG(php_u_strcspn(u_start /*str1_start*/,
+ 
(UChar *)s2 /*str2_start*/,
+ u_end 
/*str1_end*/,
+ 
(UChar *)s2 + len2 /*str2_end*/));
+   }
+   } else {
+   if (behavior == STR_STRSPN) {
+   RETURN_LONG(php_strspn((char *)s1 + start 
/*str1_start*/,
+  (char *)s2 
/*str2_start*/,
+  (char *)s1 + 
start + len /*str1_end*/,
+  (char *)s2 + 
len2 /*str2_end*/));
+   } else if (behavior == STR_STRCSPN) {
+   RETURN_LONG(php_strcspn((char *)s1 + start 
/*str1_start*/,
+   (char 
*)s2 /*str2_start*/,
+   (char 
*)s1 + start + len /*str1_end*/,
+   (char 
*)s2 + len2 /*str2_end*/));
+   }
}
-   
+
 }
 
 /* {{{ proto int strspn(string str, string mask [, start [, len]])
@@ -1901,6 +1925,25 @@
 }
 /* }}} */
 
+/* {{{ php_u_strspn
+ */
+PHPAPI int32_t php_u_strspn(UChar *s1, UChar *s2, UChar *s1_end, UChar *s2_end)
+{
+   int32_t len1 = s1_end - s1;
+   int32_t len2 = s2_end - s2;
+   int32_t i, codepts;
+   UChar32 ch;
+
+   for (i = 0, codepts = 0 ; i  len1 ; codepts++) {
+   U16_NEXT(s1, i, len1, ch);
+   if (u_memchr32(s2, ch, len2) == NULL) {
+   break;
+ 

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-17 Thread Rolland Santimano
rolland Mon Oct 17 13:07:46 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of strrpos()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.495r2=1.496ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.495 php-src/ext/standard/string.c:1.496
--- php-src/ext/standard/string.c:1.495 Sat Oct 15 08:50:20 2005
+++ php-src/ext/standard/string.c   Mon Oct 17 13:07:44 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.495 2005/10/15 12:50:20 derick Exp $ */
+/* $Id: string.c,v 1.496 2005/10/17 17:07:44 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2292,61 +2292,118 @@
Finds position of last occurrence of a string within another string */
 PHP_FUNCTION(strrpos)
 {
-   zval *zneedle;
-   char *needle, *haystack;
-   int needle_len, haystack_len;
+   zval *zhaystack, *zneedle;
+   void *haystack, *needle;
+   int32_t haystack_len, needle_len = 0;
+   zend_uchar str_type;
long offset = 0;
char *p, *e, ord_needle[2];
+   UChar *pos, *u_p, *u_e, u_ord_needle[3];
 
-   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, sz|l, haystack, 
haystack_len, zneedle, offset) == FAILURE) {
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, zz|l,
+ zhaystack, zneedle, 
offset) == FAILURE) {
RETURN_FALSE;
}
 
-   if (Z_TYPE_P(zneedle) == IS_STRING) {
-   needle = Z_STRVAL_P(zneedle);
-   needle_len = Z_STRLEN_P(zneedle);
+   if (Z_TYPE_P(zhaystack) != IS_UNICODE  Z_TYPE_P(zhaystack) != 
IS_BINARY  Z_TYPE_P(zhaystack) != IS_STRING) {
+   convert_to_text(zhaystack);
+   }
+   if (Z_TYPE_P(zneedle) == IS_UNICODE || Z_TYPE_P(zneedle) == IS_BINARY 
|| Z_TYPE_P(zneedle) == IS_STRING) {
+   if (Z_TYPE_P(zneedle) != Z_TYPE_P(zhaystack)) {
+   str_type = zend_get_unified_string_type(2 TSRMLS_CC, 
Z_TYPE_P(zhaystack), Z_TYPE_P(zneedle));
+   if (str_type == (zend_uchar)-1) {
+   convert_to_explicit_type(zhaystack, IS_BINARY);
+   convert_to_explicit_type(zneedle, IS_BINARY);
+   } else {
+   convert_to_explicit_type(zhaystack, str_type);
+   convert_to_explicit_type(zneedle, str_type);
+   }
+   }
+   needle = Z_UNIVAL_P(zneedle);
+   needle_len = Z_UNILEN_P(zneedle);
} else {
-   convert_to_long(zneedle);
-   ord_needle[0] = (char)(Z_LVAL_P(zneedle)  0xFF);
-   ord_needle[1] = '\0';
-   needle = ord_needle;
-   needle_len = 1;
+   if (Z_TYPE_P(zhaystack) == IS_UNICODE) {
+   if (Z_LVAL_P(zneedle)  0 || Z_LVAL_P(zneedle)  
0x10) {
+   php_error(E_WARNING, Needle argument codepoint 
value out of range (0 - 0x10));
+   RETURN_FALSE;
+   }
+   if (U_IS_BMP(Z_LVAL_P(zneedle))) {
+   u_ord_needle[needle_len++] = 
(UChar)Z_LVAL_P(zneedle);
+   u_ord_needle[needle_len]   = 0;
+   } else {
+   u_ord_needle[needle_len++] = 
(UChar)U16_LEAD(Z_LVAL_P(zneedle));
+   u_ord_needle[needle_len++] = 
(UChar)U16_TRAIL(Z_LVAL_P(zneedle));
+   u_ord_needle[needle_len]   = 0;
+   }
+   needle = u_ord_needle;
+   } else {
+   convert_to_long(zneedle);
+   ord_needle[0] = (char)(Z_LVAL_P(zneedle)  0xFF);
+   ord_needle[1] = '\0';
+   needle = ord_needle;
+   needle_len = 1;
+   }
}
+   haystack = Z_UNIVAL_P(zhaystack);
+   haystack_len = Z_UNILEN_P(zhaystack);
 
if ((haystack_len == 0) || (needle_len == 0)) {
RETURN_FALSE;
}
 
-   if (offset = 0) {
-   p = haystack + offset;
-   e = haystack + haystack_len - needle_len;
+   if (Z_TYPE_P(zhaystack) == IS_UNICODE) {
+   if (offset = 0) {
+   u_p = (UChar *)haystack + offset;
+   u_e = (UChar *)haystack + haystack_len - needle_len;
+   } else {
+   u_p = haystack;
+   if (-offset  haystack_len) {
+   u_e = (UChar *)haystack - needle_len;
+   } else if (needle_len  -offset) {
+   

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-17 Thread Rolland Santimano
rolland Mon Oct 17 15:50:19 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of stripos()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.496r2=1.497ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.496 php-src/ext/standard/string.c:1.497
--- php-src/ext/standard/string.c:1.496 Mon Oct 17 13:07:44 2005
+++ php-src/ext/standard/string.c   Mon Oct 17 15:50:13 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.496 2005/10/17 17:07:44 rolland Exp $ */
+/* $Id: string.c,v 1.497 2005/10/17 19:50:13 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2228,51 +2228,109 @@
Finds position of first occurrence of a string within another, case 
insensitive */
 PHP_FUNCTION(stripos)
 {
-   char *found = NULL;
-   char *haystack;
-   int haystack_len;
+   zval *haystack, *needle;
long offset = 0;
-   char *needle_dup = NULL, *haystack_dup;
+   int32_t haystack_len, needle_len = 0;
+   zend_uchar str_type;
+   void *haystack_dup, *needle_dup = NULL;
char needle_char[2];
-   zval *needle;
+   char c;
+   UChar u_needle_char[3];
+   UChar32 ch;
+   void *found = NULL;
 
-   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, sz|l, haystack, 
haystack_len, needle, offset) == FAILURE) {
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, zz|l, haystack, 
needle, offset) == FAILURE) {
return;
}
 
-   if (offset  0 || offset  haystack_len) {
+   if (Z_TYPE_P(haystack) != IS_UNICODE  Z_TYPE_P(haystack) != IS_BINARY 
 Z_TYPE_P(haystack) != IS_STRING) {
+   convert_to_text(haystack);
+   }
+   if (offset  0 || offset  Z_UNILEN_P(haystack)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset not 
contained in string.);
RETURN_FALSE;
}
 
-   haystack_dup = estrndup(haystack, haystack_len);
-   php_strtolower(haystack_dup, haystack_len);
-
-   if (Z_TYPE_P(needle) == IS_STRING) {
-   needle_dup = estrndup(Z_STRVAL_P(needle), Z_STRLEN_P(needle));
-   php_strtolower(needle_dup, Z_STRLEN_P(needle));
-   found = php_memnstr(haystack_dup + offset, needle_dup, 
Z_STRLEN_P(needle), haystack_dup + haystack_len);
+   if (Z_TYPE_P(needle) == IS_UNICODE || Z_TYPE_P(needle) == IS_BINARY || 
Z_TYPE_P(needle) == IS_STRING) {
+   if (!Z_UNILEN_P(needle)) {
+   RETURN_FALSE;
+   }
+   if (Z_TYPE_P(haystack) != Z_TYPE_P(needle)) {
+   str_type = zend_get_unified_string_type(2 TSRMLS_CC, 
Z_TYPE_P(haystack), Z_TYPE_P(needle));
+   if (str_type == (zend_uchar)-1) {
+   convert_to_explicit_type(haystack, IS_BINARY);
+   convert_to_explicit_type(needle, IS_BINARY);
+   } else {
+   convert_to_explicit_type(haystack, str_type);
+   convert_to_explicit_type(needle, str_type);
+   }
+   }
+   haystack_len = Z_UNILEN_P(haystack);
+   needle_len = Z_UNILEN_P(needle);
+   if (Z_TYPE_P(haystack) == IS_UNICODE) {
+   haystack_dup = eustrndup(Z_USTRVAL_P(haystack), 
haystack_len);
+   php_u_strtolower((UChar **)haystack_dup, 
haystack_len, UG(default_locale));
+   needle_dup = eustrndup(Z_STRVAL_P(needle), needle_len);
+   php_u_strtolower((UChar **)needle_dup, needle_len, 
UG(default_locale));
+   found = zend_u_memnstr((UChar *)haystack_dup + offset,
+  (UChar 
*)needle_dup, needle_len,
+  (UChar 
*)haystack_dup + haystack_len);
+   } else {
+   haystack_dup = estrndup(Z_STRVAL_P(haystack), 
haystack_len);
+   php_strtolower((char *)haystack_dup, haystack_len);
+   needle_dup = estrndup(Z_STRVAL_P(needle), needle_len);
+   php_strtolower((char *)needle_dup, Z_STRLEN_P(needle));
+   found = php_memnstr((char *)haystack_dup + offset,
+   (char 
*)needle_dup, needle_len,
+   (char 
*)haystack_dup + haystack_len);
+   }
} else {
switch (Z_TYPE_P(needle)) {
case IS_LONG:
case IS_BOOL:
-   needle_char[0] = tolower((char) 
Z_LVAL_P(needle));
+  

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-05 Thread Rolland Santimano
rolland Wed Oct  5 08:16:03 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - strip_tags(): some fixes, still AWiP
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.492r2=1.493ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.492 php-src/ext/standard/string.c:1.493
--- php-src/ext/standard/string.c:1.492 Tue Oct  4 02:33:10 2005
+++ php-src/ext/standard/string.c   Wed Oct  5 08:16:02 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.492 2005/10/04 06:33:10 dmitry Exp $ */
+/* $Id: string.c,v 1.493 2005/10/05 12:16:02 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -4803,8 +4803,8 @@
Strips HTML and PHP tags from a string */
 PHP_FUNCTION(strip_tags)
 {
-   void *str, *allow=NULL;
-   int32_t str_len, allow_len;
+   void *str, *allow = NULL;
+   int32_t str_len, allow_len = 0;
zend_uchar str_type, allow_type;
void *buf;
int32_t retval_len;
@@ -4982,7 +4982,8 @@
 int php_u_tag_find(UChar *tag, int32_t len, UChar *set, int32_t set_len)
 {
int32_t idx = 0;
-   UChar32 ch, *norm, *n;
+   UChar32 ch;
+   UChar *norm, *n;
int state = 0, done = 0;
 
if (!len) {
@@ -5018,9 +5019,9 @@
}
}
*(n++) = '';
-   *n = '\0';
+   *n = 0;
 
-   if (u_strFindFirst(tag, len, set, set_len) != NULL) {
+   if (u_strFindFirst(set, set_len, norm, n-norm) != NULL) {
done = 1;
} else {
done = 0;
@@ -5121,7 +5122,7 @@
 
buf = eustrndup(rbuf, len);
rp = rbuf;
-   if (allow) {
+   if (allow_len != 0) {
php_u_strtolower(allow, allow_len, UG(default_locale));
tbuf = eumalloc(PHP_TAG_BUF_SIZE+1);
tp = tbuf;
@@ -5147,7 +5148,7 @@
if (state == 0) {
last = 0x3C;
state = 1;
-   if (allow) {
+   if (allow_len) {
tp = ((tp-tbuf) = 
UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp);
*(tp++) = ch;
}
@@ -5162,7 +5163,7 @@
last = 0x28;
br++;
}
-   } else if (allow  state == 1) {
+   } else if (allow_len  state == 1) {
tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? 
tbuf: tp);
*(tp++) = ch;
} else if (state == 0) {
@@ -5176,7 +5177,7 @@
last = ch;
br--;
}
-   } else if (allow  state == 1) {
+   } else if (allow_len  state == 1) {
tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? 
tbuf: tp);
*(tp++) = ch;
} else if (state == 0) {
@@ -5194,7 +5195,7 @@
case 1: /* HTML/XML */
last = ch;
state = 0;
-   if (allow) {
+   if (allow_len) {
tp = ((tp-tbuf) = 
UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp);
*(tp++) = ch;
*(tp) = 0;
@@ -5241,7 +5242,7 @@
}
} else if (state == 0) {
*(rp++) = ch;
-   } else if (allow  state == 1) {
+   } else if (allow_len  state == 1) {
tp = ((tp-tbuf) = UBYTES(PHP_TAG_BUF_SIZE) ? 
tbuf: tp);
*(tp++) = ch;
}
@@ -5254,10 +5255,10 @@
last = ch;
} else {
if (state == 0) {
-   (*rp++) = 0x21;
-   } else if (allow  state == 1) {
-   tp = ((tp-tbuf) = PHP_TAG_BUF_SIZE ? 
tbuf: tp);
-   *(tp++) = 0x21;
+   *(rp++) = ch;
+   } else if (allow_len  state == 1) {
+   tp = ((tp-tbuf) = 
UBYTES(PHP_TAG_BUF_SIZE) ? tbuf: tp);
+   *(tp++) = ch;
}
}
break;
@@ -5310,7 +5311,7 @@
 reg_u_char:

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-03 Thread Rolland Santimano
rolland Mon Oct  3 02:14:12 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of similar_text()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.489r2=1.490ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.489 php-src/ext/standard/string.c:1.490
--- php-src/ext/standard/string.c:1.489 Fri Sep 30 09:19:15 2005
+++ php-src/ext/standard/string.c   Mon Oct  3 02:14:10 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.489 2005/09/30 13:19:15 rolland Exp $ */
+/* $Id: string.c,v 1.490 2005/10/03 06:14:10 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3410,6 +3410,42 @@
 }
 /* }}} */
 
+/* {{{ php_u_similar_str
+ */
+static void php_u_similar_str(const UChar *txt1, int32_t len1,
+ const UChar *txt2, 
int32_t len2,
+ int32_t *pos1, 
int32_t *end1,
+ int32_t *pos2, 
int32_t *end2, int *max)
+{
+   int32_t i1, i2, j1, j2, l;
+   UChar32 ch1, ch2;
+
+   *max = 0;
+   for (i1 = 0 ; i1  len1 ; ) {
+   for (i2 = 0 ; i2  len2 ; ) {
+   l = 0 ; j1 = 0 ; j2 = 0;
+   while ((i1+j1  len1)  (i2+j2  len2)) {
+   U16_NEXT(txt1+i1, j1, len1-i1, ch1);
+   U16_NEXT(txt2+i2, j2, len2-i2, ch2);
+   if (ch1 != ch2) {
+   U16_BACK_1(txt1+i1, 0, j1);
+   U16_BACK_1(txt2+i2, 0, j2);
+   break;
+   }
+   l++;
+   }
+   if (l  *max) {
+   *max = l;
+   *pos1 = i1; *end1 = j1;
+   *pos2 = i2; *end2 = j2;
+   }
+   U16_FWD_1(txt2, i2, len2);
+   }
+   U16_FWD_1(txt1, i1, len1);
+   }
+}
+/* }}} */
+
 /* {{{ php_similar_str
  */
 static void php_similar_str(const char *txt1, int len1, const char *txt2, int 
len2, int *pos1, int *pos2, int *max)
@@ -3433,6 +3469,27 @@
 }
 /* }}} */
 
+/* {{{ php_u_similar_char
+ */
+static int php_u_similar_char(const UChar *txt1, int32_t len1, const UChar 
*txt2, int32_t len2)
+{
+   int sum, max;
+   int32_t pos1, pos2, end1, end2;
+
+   php_u_similar_str(txt1, len1, txt2, len2, pos1, end1, pos2, end2, 
max);
+   if ((sum = max)) {
+   if (pos1  pos2) {
+   sum += php_u_similar_char(txt1, pos1, txt2, pos2);
+   }
+   if ((pos1 + end1  len1)  (pos2 + end2  len2)) {
+   sum += php_similar_char((UChar *)txt1+pos1+end1, 
len1-pos1-end1,
+   (UChar 
*)txt2+pos2+end2, len2-pos2-end2);
+   }
+   }
+   return sum;
+}
+/* }}} */
+
 /* {{{ php_similar_char
  */
 static int php_similar_char(const char *txt1, int len1, const char *txt2, int 
len2)
@@ -3463,30 +3520,49 @@
zval **t1, **t2, **percent;
int ac = ZEND_NUM_ARGS();
int sim;
-   
+   zend_uchar str_type;
+
if (ac  2 || ac  3 || zend_get_parameters_ex(ac, t1, t2, percent) 
== FAILURE) {
WRONG_PARAM_COUNT;
}   
-
-   convert_to_string_ex(t1);
-   convert_to_string_ex(t2);
-
+   if (Z_TYPE_PP(t1) != IS_UNICODE  Z_TYPE_PP(t1) != IS_BINARY  
Z_TYPE_PP(t1) != IS_STRING) {
+   convert_to_text_ex(t1);
+   }
+   if (Z_TYPE_PP(t2) != IS_UNICODE  Z_TYPE_PP(t2) != IS_BINARY  
Z_TYPE_PP(t2) != IS_STRING) {
+   convert_to_text_ex(t2);
+   }
+   str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(t1), 
Z_TYPE_PP(t2));
+   if (str_type == (zend_uchar)-1) {
+   convert_to_binary_ex(t1);
+   convert_to_binary_ex(t2);
+   } else {
+   convert_to_explicit_type_ex(t1, str_type);
+   convert_to_explicit_type_ex(t2, str_type);
+   }
if (ac  2) {
convert_to_double_ex(percent);
}
-   
-   if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) {
+
+   if (Z_UNILEN_PP(t1) + Z_UNILEN_PP(t2) == 0) {
if (ac  2) {
Z_DVAL_PP(percent) = 0;
}
 
RETURN_LONG(0);
}
-   
-   sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), 
Z_STRVAL_PP(t2), Z_STRLEN_PP(t2)); 
+
+   if (str_type == IS_UNICODE) {
+   sim = php_u_similar_char(Z_USTRVAL_PP(t1), Z_USTRLEN_PP(t1), 
Z_USTRVAL_PP(t2), 

[PHP-CVS] cvs: php-src / unicode-progress.txt

2005-10-03 Thread Rolland Santimano
rolland Mon Oct  3 02:19:51 2005 EDT

  Modified files:  
/php-srcunicode-progress.txt 
  Log:
  levenshtein(), similar_text()
  
  
http://cvs.php.net/diff.php/php-src/unicode-progress.txt?r1=1.9r2=1.10ty=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.9 php-src/unicode-progress.txt:1.10
--- php-src/unicode-progress.txt:1.9Wed Sep 28 08:25:34 2005
+++ php-src/unicode-progress.txtMon Oct  3 02:19:51 2005
@@ -11,8 +11,10 @@
 chr()
 explode()
 implode()
+levenshtein()
 ord()
 range()
+similar_text()
 str_pad()
 str_repeat()
 stripslashes()

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-10-03 Thread Rolland Santimano
rolland Mon Oct  3 13:00:05 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of strip_tags()
  
  http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.490r2=1.491ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.490 php-src/ext/standard/string.c:1.491
--- php-src/ext/standard/string.c:1.490 Mon Oct  3 02:14:10 2005
+++ php-src/ext/standard/string.c   Mon Oct  3 13:00:00 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.490 2005/10/03 06:14:10 rolland Exp $ */
+/* $Id: string.c,v 1.491 2005/10/03 17:00:00 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -4803,34 +4803,30 @@
Strips HTML and PHP tags from a string */
 PHP_FUNCTION(strip_tags)
 {
-   char *buf;
-   zval **str, **allow=NULL;
-   char *allowed_tags=NULL;
-   int allowed_tags_len=0;
-   size_t retval_len;
+   void *str, *allow=NULL;
+   int32_t str_len, allow_len;
+   zend_uchar str_type, allow_type;
+   void *buf;
+   int32_t retval_len;
 
-   switch (ZEND_NUM_ARGS()) {
-   case 1:
-   if (zend_get_parameters_ex(1, str) == FAILURE) {
-   RETURN_FALSE;
-   }
-   break;
-   case 2:
-   if (zend_get_parameters_ex(2, str, allow) == FAILURE) 
{
-   RETURN_FALSE;
-   }
-   convert_to_string_ex(allow);
-   allowed_tags = Z_STRVAL_PP(allow);
-   allowed_tags_len = Z_STRLEN_PP(allow);
-   break;
-   default:
-   WRONG_PARAM_COUNT;
-   break;
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, T|T, str, 
str_len, str_type,
+ allow, allow_len, 
allow_type) == FAILURE) {
+   return;
+   }
+
+   if (str_type == IS_UNICODE) {
+   buf = eustrndup(str, str_len);
+   retval_len = php_u_strip_tags((UChar *)buf, str_len, NULL, 
(UChar *)allow, allow_len TSRMLS_CC);
+   RETURN_UNICODEL((UChar *)buf, retval_len, 0);
+   } else {
+   buf = estrndup(str, str_len);
+   retval_len = php_strip_tags((char *)buf, str_len, NULL, (char 
*)allow, allow_len);
+   if (str_type == IS_BINARY) {
+   RETURN_BINARYL((char *)buf, retval_len, 0);
+   } else {
+   RETURN_STRINGL((char *)buf, retval_len, 0);
+   }
}
-   convert_to_string_ex(str);
-   buf = estrndup(Z_STRVAL_PP(str), Z_STRLEN_PP(str));
-   retval_len = php_strip_tags(buf, Z_STRLEN_PP(str), NULL, allowed_tags, 
allowed_tags_len);
-   RETURN_STRINGL(buf, retval_len, 0);
 }
 /* }}} */
 
@@ -4971,15 +4967,72 @@
 
 #define PHP_TAG_BUF_SIZE 1023
 
-/* {{{ php_tag_find
+/* php_u_tag_find / php_tag_find
  *
  * Check if tag is in a set of tags 
  *
  * states:
- * 
+ *
  * 0 start tag
  * 1 first non-whitespace char seen
  */
+
+/* {{{ php_u_tag_find
+ */
+int php_u_tag_find(UChar *tag, int32_t len, UChar *set, int32_t set_len)
+{
+   int32_t idx = 0;
+   UChar32 ch, *norm, *n;
+   int state = 0, done = 0;
+
+   if (!len) {
+   return 0;
+   }
+
+   norm = eumalloc(len+1);
+   n = norm;
+
+   while (!done) {
+   U16_NEXT(tag, idx, len, ch);
+   switch (u_tolower(ch)) {
+   case '':
+   *(n++) = ch;
+   break;
+   case '':
+   done = 1;
+   break;
+   default:
+   if (u_isWhitespace(ch) == FALSE) {
+   if (state == 0) {
+   state = 1;
+   if (ch != '/')
+   *(n++) = ch;
+   } else {
+   *(n++) = ch;
+   }
+   } else {
+   if (state == 1)
+   done = 1;
+   }
+   break;
+   }
+   }
+   *(n++) = '';
+   *n = '\0';
+
+   if (u_strFindFirst(tag, len, set, set_len) != NULL) {
+   done = 1;
+   } else {
+   done = 0;
+   }
+
+   efree(norm);
+   return done;
+}
+/* }}} */
+
+/* {{{ php_tag_find
+ */
 int php_tag_find(char *tag, int len, char *set) {
char c, *n, *t;
int state=0, done=0;
@@ -5033,7 +5086,7 @@
 }
 /* }}} */
 
-/* {{{ php_strip_tags
+/* php_u_strip_tags / 

[PHP-CVS] cvs: php-src /ext/standard levenshtein.c

2005-09-30 Thread Rolland Santimano
rolland Fri Sep 30 02:20:49 2005 EDT

  Modified files:  
/php-src/ext/standard   levenshtein.c 
  Log:
  - Unicode impl of levenshtein()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/levenshtein.c?r1=1.34r2=1.35ty=u
Index: php-src/ext/standard/levenshtein.c
diff -u php-src/ext/standard/levenshtein.c:1.34 
php-src/ext/standard/levenshtein.c:1.35
--- php-src/ext/standard/levenshtein.c:1.34 Wed Aug  3 10:08:08 2005
+++ php-src/ext/standard/levenshtein.c  Fri Sep 30 02:20:47 2005
@@ -15,7 +15,7 @@
| Author: Hartmut Holzgraefe [EMAIL PROTECTED]|
+--+
  */
-/* $Id: levenshtein.c,v 1.34 2005/08/03 14:08:08 sniper Exp $ */
+/* $Id: levenshtein.c,v 1.35 2005/09/30 06:20:47 rolland Exp $ */
 
 #include php.h
 #include stdlib.h
@@ -27,39 +27,58 @@
 
 /* {{{ reference_levdist
  * reference implementation, only optimized for memory usage, not speed */
-static int reference_levdist(const char *s1, int l1, 
-   
 const char *s2, int l2, 
-   
 int cost_ins, int cost_rep, int cost_del )
+static int reference_levdist(void *s1, int32_t l1, void *s2, int32_t l2, 
zend_uchar str_type, int cost_ins, int cost_rep, int cost_del )
 {
int *p1, *p2, *tmp;
-   int i1, i2, c0, c1, c2;
-   
-   if(l1==0) return l2*cost_ins;
-   if(l2==0) return l1*cost_del;
+   int32_t i1, i2, j1, j2, cp1, cp2;
+   int32_t c0, c1, c2;
+   UChar32 ch1, ch2;
+
+   if (str_type == IS_UNICODE) {
+   cp1 = u_countChar32((UChar *)s1, l1);
+   cp2 = u_countChar32((UChar *)s2, l2);
+
+   if (cp1 == 0) return cp2*cost_ins;
+   if (cp2 == 0) return cp1*cost_del;
+   if ((cp1LEVENSHTEIN_MAX_LENTH)||(cp2LEVENSHTEIN_MAX_LENTH)) {
+   return -1;
+   }
 
-   if((l1LEVENSHTEIN_MAX_LENTH)||(l2LEVENSHTEIN_MAX_LENTH))
-   return -1;
+   p1 = safe_emalloc((cp2+1), sizeof(int), 0);
+   p2 = safe_emalloc((cp2+1), sizeof(int), 0);
+   } else {
+   if (l1 == 0) return l2*cost_ins;
+   if (l2 == 0) return l1*cost_del;
+   if ((l1LEVENSHTEIN_MAX_LENTH)||(l2LEVENSHTEIN_MAX_LENTH)) {
+   return -1;
+   }
 
-   p1 = safe_emalloc((l2+1), sizeof(int), 0);
-   p2 = safe_emalloc((l2+1), sizeof(int), 0);
+   p1 = safe_emalloc((l2+1), sizeof(int), 0);
+   p2 = safe_emalloc((l2+1), sizeof(int), 0);
+   }
 
-   for(i2=0;i2=l2;i2++)
+   for (i2 = 0 ; i2 = l2 ; i2++)
p1[i2] = i2*cost_ins;
 
-   for(i1=0;i1l1;i1++)
-   {
-   p2[0]=p1[0]+cost_del;
-   for(i2=0;i2l2;i2++)
-   {
-   c0=p1[i2]+((s1[i1]==s2[i2])?0:cost_rep);
-   c1=p1[i2+1]+cost_del; if(c1c0) c0=c1;
-   c2=p2[i2]+cost_ins; if(c2c0) c0=c2;

-   p2[i2+1]=c0;
-   }
-   tmp=p1; p1=p2; p2=tmp;
+   for (i1 = 0, j1 = 0 ; i1  l1 ; i1++) {
+   p2[0] = p1[0] + cost_del;
+   if (str_type == IS_UNICODE) {
+   U16_NEXT((UChar *)s1, j1, l1, ch1);
}
-
-   c0=p1[l2];
+   for (i2 = 0, j2 = 0 ; i2  l2 ; i2++) {
+   if (str_type == IS_UNICODE) {
+   U16_NEXT((UChar *)s2, j2, l2, ch2);
+   c0 = p1[i2] + ((ch1==ch2) ? 0 : cost_rep);
+   } else {
+   c0 = p1[i2] + ((*((char *)s1+i1)==*((char 
*)s2+i2)) ? 0 : cost_rep);
+   }
+   c1 = p1[i2+1] + cost_del; if (c1  c0) c0 = c1;
+   c2 = p2[i2] + cost_ins; if (c2  c0) c0 = c2;   

+   p2[i2+1] = c0;
+   }
+   tmp=p1; p1=p2; p2=tmp;
+   }
+   c0 = p1[l2];
 
efree(p1);
efree(p2);
@@ -70,7 +89,7 @@
 
 /* {{{ custom_levdist
  */
-static int custom_levdist(char *str1, char *str2, char *callback_name 
TSRMLS_DC) 
+static int custom_levdist(void *str1, void *str2, char *callback_name 
TSRMLS_DC) 
 {
php_error_docref(NULL TSRMLS_CC, E_WARNING, The general Levenshtein 
support is not there yet);
/* not there yet */
@@ -83,56 +102,51 @@
Calculate Levenshtein distance between two strings */
 PHP_FUNCTION(levenshtein)
 {
-   zval **str1, **str2, **cost_ins, **cost_rep, **cost_del, 

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-30 Thread Rolland Santimano
rolland Fri Sep 30 09:19:19 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - substr_replace(): call correct funcn for string conversion
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.488r2=1.489ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.488 php-src/ext/standard/string.c:1.489
--- php-src/ext/standard/string.c:1.488 Thu Sep 29 07:05:30 2005
+++ php-src/ext/standard/string.c   Fri Sep 30 09:19:15 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.488 2005/09/29 11:05:30 rolland Exp $ */
+/* $Id: string.c,v 1.489 2005/09/30 13:19:15 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2800,11 +2800,11 @@
if (tmp_repl  Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl)) {
str_type = zend_get_unified_string_type(2 
TSRMLS_CC, Z_TYPE_PP(str), Z_TYPE_PP(tmp_repl));
if (str_type == (zend_uchar)-1) {
-   convert_to_explicit_type(str, 
IS_BINARY);
-   convert_to_explicit_type(tmp_repl, 
IS_BINARY);
+   convert_to_explicit_type_ex(str, 
IS_BINARY);
+   convert_to_explicit_type_ex(tmp_repl, 
IS_BINARY);
} else {
-   convert_to_explicit_type(str, str_type);
-   convert_to_explicit_type(tmp_repl, 
str_type);
+   convert_to_explicit_type_ex(str, 
str_type);
+   convert_to_explicit_type_ex(tmp_repl, 
str_type);
}
}
php_adjust_limits(str, f, l);
@@ -2881,11 +2881,11 @@
if (tmp_repl  Z_TYPE_PP(tmp_str) != 
Z_TYPE_PP(tmp_repl)) {
str_type = zend_get_unified_string_type(2 
TSRMLS_CC, Z_TYPE_PP(tmp_str), Z_TYPE_PP(tmp_repl));
if (str_type == (zend_uchar)-1) {
-   convert_to_explicit_type(tmp_str, 
IS_BINARY);
-   convert_to_explicit_type(tmp_repl, 
IS_BINARY);
+   convert_to_explicit_type_ex(tmp_str, 
IS_BINARY);
+   convert_to_explicit_type_ex(tmp_repl, 
IS_BINARY);
} else {
-   convert_to_explicit_type(tmp_str, 
str_type);
-   convert_to_explicit_type(tmp_repl, 
str_type);
+   convert_to_explicit_type_ex(tmp_str, 
str_type);
+   convert_to_explicit_type_ex(tmp_repl, 
str_type);
}
}
php_adjust_limits(tmp_str, f, l);

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-29 Thread Rolland Santimano
rolland Thu Sep 29 05:33:41 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Updated addslashes(): add codepoints directly rather than with 
zend_codepoint_to_uchar()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.486r2=1.487ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.486 php-src/ext/standard/string.c:1.487
--- php-src/ext/standard/string.c:1.486 Wed Sep 28 18:31:29 2005
+++ php-src/ext/standard/string.c   Thu Sep 29 05:33:38 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.486 2005/09/28 22:31:29 iliaa Exp $ */
+/* $Id: string.c,v 1.487 2005/09/29 09:33:38 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3817,12 +3817,12 @@
U16_NEXT(str, i, length, ch);
switch (ch) {
case '\0':
-   buf_len += zend_codepoint_to_uchar('\\', 
buf+buf_len);
-   buf_len += zend_codepoint_to_uchar('0', 
buf+buf_len);
+   *(buf+buf_len) = (UChar)0x5C; buf_len++; /* \ */
+   *(buf+buf_len) = (UChar)0x30; buf_len++; /* 0 */
break;
case '\'':
-   buf_len += zend_codepoint_to_uchar('\'', 
buf+buf_len);
-   buf_len += zend_codepoint_to_uchar('\'', 
buf+buf_len);
+   *(buf+buf_len) = (UChar)0x27; buf_len++; /* ' */
+   *(buf+buf_len) = (UChar)0x27; buf_len++; /* ' */
break;
default:
buf_len += zend_codepoint_to_uchar(ch, 
buf+buf_len);
@@ -3834,13 +3834,13 @@
U16_NEXT(str, i, length, ch);
switch (ch) {
case '\0':
-   buf_len += zend_codepoint_to_uchar('\\', 
buf+buf_len);
-   buf_len += zend_codepoint_to_uchar('0', 
buf+buf_len);
+   *(buf+buf_len) = (UChar)0x5C; buf_len++; /* \ */
+   *(buf+buf_len) = (UChar)0x30; buf_len++; /* 0 */
break;
case '\'':
case '\':
case '\\':
-   buf_len += zend_codepoint_to_uchar('\\', 
buf+buf_len);
+   *(buf+buf_len) = (UChar)0x5C; buf_len++; /* \ */
/* break is missing *intentionally* */
default:
buf_len += zend_codepoint_to_uchar(ch, 
buf+buf_len);

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard php_string.h string.c

2005-09-28 Thread Rolland Santimano
rolland Wed Sep 28 05:22:14 2005 EDT

  Modified files:  
/php-src/ext/standard   php_string.h string.c 
  Log:
  - Unicode impl of {add,strip}slashes()
  
  http://cvs.php.net/diff.php/php-src/ext/standard/php_string.h?r1=1.89r2=1.90ty=u
Index: php-src/ext/standard/php_string.h
diff -u php-src/ext/standard/php_string.h:1.89 
php-src/ext/standard/php_string.h:1.90
--- php-src/ext/standard/php_string.h:1.89  Tue Aug 16 02:02:55 2005
+++ php-src/ext/standard/php_string.h   Wed Sep 28 05:22:08 2005
@@ -17,7 +17,7 @@
+--+
 */
 
-/* $Id: php_string.h,v 1.89 2005/08/16 06:02:55 rolland Exp $ */
+/* $Id: php_string.h,v 1.90 2005/09/28 09:22:08 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */
 
@@ -120,10 +120,13 @@
 PHPAPI UChar *php_u_strtoupper(UChar **s, int32_t *len, const char *locale);
 PHPAPI UChar *php_u_strtolower(UChar **s, int32_t *len, const char *locale);
 PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int 
trlen);
+PHPAPI UChar *php_u_addslashes(UChar *str, int32_t length, int32_t 
*new_length, int freeit TSRMLS_DC);
+PHPAPI UChar *php_u_addslashes_ex(UChar *str, int32_t length, int32_t 
*new_length, int freeit, int ignore_sybase TSRMLS_DC);
 PHPAPI char *php_addslashes(char *str, int length, int *new_length, int freeit 
TSRMLS_DC);
 PHPAPI char *php_addslashes_ex(char *str, int length, int *new_length, int 
freeit, int ignore_sybase TSRMLS_DC);
 PHPAPI char *php_addcslashes(char *str, int length, int *new_length, int 
freeit, char *what, int wlength TSRMLS_DC);
 PHPAPI void php_stripslashes(char *str, int *len TSRMLS_DC);
+PHPAPI void php_u_stripslashes(UChar *str, int32_t *len TSRMLS_DC);
 PHPAPI void php_stripcslashes(char *str, int *len);
 PHPAPI void php_basename(char *s, size_t len, char *suffix, size_t sufflen, 
char **p_ret, size_t *p_len TSRMLS_DC);
 PHPAPI size_t php_dirname(char *str, size_t len);
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.483r2=1.484ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.483 php-src/ext/standard/string.c:1.484
--- php-src/ext/standard/string.c:1.483 Mon Sep 26 19:08:10 2005
+++ php-src/ext/standard/string.c   Wed Sep 28 05:22:08 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.483 2005/09/26 23:08:10 iliaa Exp $ */
+/* $Id: string.c,v 1.484 2005/09/28 09:22:08 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3427,6 +3427,66 @@
 }
 /* }}} */
 
+/* {{{ php_u_stripslashes
+ *
+ * be careful, this edits the string in-place */
+PHPAPI void php_u_stripslashes(UChar *str, int32_t *len TSRMLS_DC)
+{
+   int32_t tmp_len = 0, i = 0;
+   UChar32 ch1, ch2;
+
+   ch1 = -1; ch2 = -1;
+   if (PG(magic_quotes_sybase)) {
+   while (i  *len) {
+   U16_NEXT(str, i, *len, ch1);
+   if (ch1 == '\'') {
+   tmp_len += zend_codepoint_to_uchar(ch1, 
str+tmp_len);
+   if (i  *len) {
+   U16_NEXT(str, i, *len, ch2);
+   if (ch2 != '\'') {
+   tmp_len += 
zend_codepoint_to_uchar(ch2, str+tmp_len);
+   }
+   }
+   } else if (ch1 == '\\') {
+   if (i  *len) {
+   U16_NEXT(str, i, *len, ch2);
+   if (ch2 == '0') {
+   tmp_len += 
zend_codepoint_to_uchar('\0', str+tmp_len);
+   } else {
+   tmp_len += 
zend_codepoint_to_uchar(ch1, str+tmp_len);
+   tmp_len += 
zend_codepoint_to_uchar(ch2, str+tmp_len);
+   }
+   } else {
+   tmp_len += zend_codepoint_to_uchar(ch1, 
str+tmp_len);
+   }
+   } else {
+   tmp_len += zend_codepoint_to_uchar(ch1, 
str+tmp_len);
+   }
+   }
+   } else {
+   while (i  *len) {
+   U16_NEXT(str, i, *len, ch1);
+   if (ch1 == '\\') {
+   if (i  *len) {
+   U16_NEXT(str, i, *len, ch2);
+   if (ch2 == '0') {
+   tmp_len += 
zend_codepoint_to_uchar('\0', str+tmp_len);
+   } else {
+   

[PHP-CVS] cvs: php-src / unicode-progress.txt

2005-09-28 Thread Rolland Santimano
rolland Wed Sep 28 08:25:35 2005 EDT

  Modified files:  
/php-srcunicode-progress.txt 
  Log:
  {add,strip}slashes()
  
  
http://cvs.php.net/diff.php/php-src/unicode-progress.txt?r1=1.8r2=1.9ty=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.8 php-src/unicode-progress.txt:1.9
--- php-src/unicode-progress.txt:1.8Fri Sep  9 05:54:16 2005
+++ php-src/unicode-progress.txtWed Sep 28 08:25:34 2005
@@ -6,6 +6,7 @@
   Status: In Progress
 
   Completed:
+addslashes()
 bin2hex()
 chr()
 explode()
@@ -14,6 +15,7 @@
 range()
 str_pad()
 str_repeat()
+stripslashes()
 strpos()
 strrev()
 strstr()

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-28 Thread Rolland Santimano
rolland Wed Sep 28 10:58:37 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Updated substr_replace() to use:
- zend_get_unified_string_type()/convert_to_explicit_type() for type 
conversion
- u_countChar32() for counting codepoints
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.484r2=1.485ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.484 php-src/ext/standard/string.c:1.485
--- php-src/ext/standard/string.c:1.484 Wed Sep 28 05:22:08 2005
+++ php-src/ext/standard/string.c   Wed Sep 28 10:58:31 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.484 2005/09/28 09:22:08 rolland Exp $ */
+/* $Id: string.c,v 1.485 2005/09/28 14:58:31 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2553,44 +2553,14 @@
 /* }}} */
 
 
-/* {{{ php_unify_string_types
- */
-PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC)
-{
-   if (p == NULL || q == NULL) {
-   return;
-   }
-
-   if (Z_TYPE_PP(p) == IS_UNICODE) {
-   if (Z_TYPE_PP(q) == IS_BINARY) {
-   convert_to_binary_ex(p);
-   } else {
-   convert_to_unicode_ex(q);
-   }
-   } else if (Z_TYPE_PP(p) == IS_BINARY) {
-   convert_to_binary_ex(q);
-   } else {
-   if (Z_TYPE_PP(q) == IS_BINARY) {
-   convert_to_binary_ex(p);
-   } else {
-   convert_to_string_ex(q);
-   }
-   }
-}
-/* {{{ */
-
 /* {{{ php_adjust_limits
  */
 PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l)
 {
-   int32_t i, str_codepts;
+   int32_t str_codepts;
 
if (Z_TYPE_PP(str) == IS_UNICODE) {
-   i = 0; str_codepts = 0;
-   while (i  Z_USTRLEN_PP(str)) {
-   U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str));
-   str_codepts++;
-   }
+   str_codepts = u_countChar32(Z_USTRVAL_PP(str), 
Z_USTRLEN_PP(str));
} else {
str_codepts = Z_STRLEN_PP(str);
}
@@ -2688,7 +2658,7 @@
 
HashPosition pos_str, pos_from, pos_repl, pos_len;
zval **tmp_str = NULL, **tmp_from = NULL, **tmp_repl = NULL, **tmp_len= 
NULL;
-
+   zend_uchar str_type;
 
if (argc  3 || argc  4 || zend_get_parameters_ex(argc, str, repl, 
from, len) == FAILURE) {
WRONG_PARAM_COUNT;
@@ -2747,8 +2717,16 @@
tmp_repl = repl;
}
 
-   if (tmp_repl  Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl))
-   php_unify_string_types(str, tmp_repl TSRMLS_CC);
+   if (tmp_repl  Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl)) {
+   str_type = zend_get_unified_string_type(2 
TSRMLS_CC, Z_TYPE_PP(str), Z_TYPE_PP(tmp_repl));
+   if (str_type == (zend_uchar)-1) {
+   convert_to_explicit_type(str, 
IS_BINARY);
+   convert_to_explicit_type(tmp_repl, 
IS_BINARY);
+   } else {
+   convert_to_explicit_type(str, str_type);
+   convert_to_explicit_type(tmp_repl, 
str_type);
+   }
+   }
php_adjust_limits(str, f, l);
result_len = php_do_substr_replace(result, str, 
tmp_repl, f, l TSRMLS_CC);
 
@@ -2820,8 +2798,16 @@
tmp_repl = repl;
}
 
-   if (tmp_repl  Z_TYPE_PP(tmp_str) != 
Z_TYPE_PP(tmp_repl))
-   php_unify_string_types(tmp_str, tmp_repl 
TSRMLS_CC);
+   if (tmp_repl  Z_TYPE_PP(tmp_str) != 
Z_TYPE_PP(tmp_repl)) {
+   str_type = zend_get_unified_string_type(2 
TSRMLS_CC, Z_TYPE_PP(tmp_str), Z_TYPE_PP(tmp_repl));
+   if (str_type == (zend_uchar)-1) {
+   convert_to_explicit_type(tmp_str, 
IS_BINARY);
+   convert_to_explicit_type(tmp_repl, 
IS_BINARY);
+   } else {
+   convert_to_explicit_type(tmp_str, 
str_type);
+   convert_to_explicit_type(tmp_repl, 
str_type);
+   }
+   }
php_adjust_limits(tmp_str, f, l);
result_len = php_do_substr_replace(result, tmp_str, 
tmp_repl, f, l TSRMLS_CC);
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php


[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-09 Thread Rolland Santimano
rolland Fri Sep  9 15:07:19 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - str_pad(): Use u_countChar32() for codepoint counting, eumalloc/eurealloc() 
for Unicode mallocs.
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.481r2=1.482ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.481 php-src/ext/standard/string.c:1.482
--- php-src/ext/standard/string.c:1.481 Thu Sep  8 10:07:40 2005
+++ php-src/ext/standard/string.c   Fri Sep  9 15:07:18 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.481 2005/09/08 14:07:40 derick Exp $ */
+/* $Id: string.c,v 1.482 2005/09/09 19:07:18 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -5394,10 +5394,7 @@
if (input_type == IS_UNICODE) {
/* For Unicode, num_pad_chars/pad_length is number of 
codepoints */
i = 0; input_codepts = 0;
-   while (i  input_len) {
-   U16_FWD_1((UChar *)input, i, input_len);
-   input_codepts++;
-   }
+   input_codepts = u_countChar32((UChar *)input, input_len);
num_pad_chars = pad_length - input_codepts;
} else {
num_pad_chars = pad_length - input_len;
@@ -5439,7 +5436,7 @@
}
 
if (input_type == IS_UNICODE) {
-   result = emalloc(UBYTES(input_len + num_pad_chars*2 + 1));
+   result = eumalloc(input_len + num_pad_chars*2 + 1);
} else {
result = emalloc(input_len + num_pad_chars + 1);
}
@@ -5481,7 +5478,7 @@
result_len += zend_codepoint_to_uchar(ch, (UChar 
*)result + result_len);
}
*((UChar *)result + result_len) = 0;
-   result = erealloc(result, UBYTES(result_len+1));
+   result = eurealloc(result, result_len+1);
} else {
for (i = 0; i  left_pad; i++)
*((char *)result + result_len++) = *((char *)padstr + 
(i % padstr_len));

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-07 Thread Rolland Santimano
rolland Thu Sep  8 01:05:38 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of substr_replace()
  
  http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.478r2=1.479ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.478 php-src/ext/standard/string.c:1.479
--- php-src/ext/standard/string.c:1.478 Wed Sep  7 03:00:03 2005
+++ php-src/ext/standard/string.c   Thu Sep  8 01:05:36 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.478 2005/09/07 07:00:03 dmitry Exp $ */
+/* $Id: string.c,v 1.479 2005/09/08 05:05:36 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2553,6 +2553,118 @@
 /* }}} */
 
 
+/* {{{ php_unify_string_types
+ */
+PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC)
+{
+   if (p == NULL || q == NULL) {
+   return;
+   }
+
+   if (Z_TYPE_PP(p) == IS_UNICODE) {
+   if (Z_TYPE_PP(q) == IS_BINARY) {
+   convert_to_binary_ex(p);
+   } else {
+   convert_to_unicode_ex(q);
+   }
+   } else if (Z_TYPE_PP(p) == IS_BINARY) {
+   convert_to_binary_ex(q);
+   } else {
+   if (Z_TYPE_PP(q) == IS_BINARY) {
+   convert_to_binary_ex(p);
+   } else {
+   convert_to_string_ex(q);
+   }
+   }
+}
+/* {{{ */
+
+/* {{{ php_adjust_limits
+ */
+PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l)
+{
+   int32_t i, str_codepts;
+
+   if (Z_TYPE_PP(str) == IS_UNICODE) {
+   i = 0; str_codepts = 0;
+   while (i  Z_USTRLEN_PP(str)) {
+   U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str));
+   str_codepts++;
+   }
+   } else {
+   str_codepts = Z_STRLEN_PP(str);
+   }
+
+   /* If from position is negative, count start position from the end
+* of the string */
+   if (*f  0) {
+   *f = str_codepts + *f;
+   if (*f  0) {
+   *f = 0;
+   }
+   } else if (*f  str_codepts) {
+   *f = str_codepts;
+   }
+   /* If length position is negative, set it to the length
+* needed to stop that many codepts/chars from the end of the string */
+   if (*l  0) {
+   *l = str_codepts - *f + *l;
+   if (*l  0) {
+   *l = 0;
+   }
+   }
+   if (((unsigned)(*f) + (unsigned)(*l))  str_codepts) {
+   *l = str_codepts - *f;
+   }
+}
+/* }}} */
+
+/* {{{ php_do_substr_replace
+ */
+PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl, 
int32_t f, int32_t l TSRMLS_DC)
+{
+   void *buf;
+   int32_t buf_len, idx;
+   UChar ch;
+
+   if (Z_TYPE_PP(str) == IS_UNICODE) {
+   buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl) 
+ 1));
+
+   /* buf_len is codept count here */
+   buf_len = 0; idx = 0;
+   while (f--  0) {
+   U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch);
+   buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf + 
buf_len);
+   }
+   if (repl != NULL) {
+   u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl), 
Z_USTRLEN_PP(repl));
+   buf_len += Z_USTRLEN_PP(repl);
+   }
+   U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l);
+   u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx, 
Z_USTRLEN_PP(str) - idx);
+   buf_len += (Z_USTRLEN_PP(str) - idx);
+
+   *((UChar *)buf + buf_len) = 0;
+   buf = erealloc(buf, UBYTES(buf_len + 1));
+   } else {
+   /* buf_len is char count here */
+   buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl);
+   buf = emalloc(buf_len + 1);
+
+   memcpy(buf, Z_STRVAL_PP(str), f);
+   if (repl != NULL ) {
+   memcpy((char *)buf + f, Z_STRVAL_PP(repl), 
Z_STRLEN_PP(repl));
+   }
+   memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) + 
f + l, Z_STRLEN_PP(str) - f - l);
+
+   *((char *)buf + buf_len) = '\0';
+   }
+
+   *result = buf;
+   return buf_len;
+}
+/* }}} */
+
 /* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed 
length])
Replaces part of a string with another string */
 PHP_FUNCTION(substr_replace)
@@ -2561,10 +2673,10 @@
zval **from;
zval **len = NULL;
zval **repl;
-   char *result;
-   int result_len;
-   int l = 0;
-   int f;
+   void *result;
+   int32_t result_len;
+   

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-05 Thread Rolland Santimano
rolland Mon Sep  5 06:55:38 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of str_pad()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.475r2=1.476ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.475 php-src/ext/standard/string.c:1.476
--- php-src/ext/standard/string.c:1.475 Mon Aug 29 02:51:14 2005
+++ php-src/ext/standard/string.c   Mon Sep  5 06:55:35 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.475 2005/08/29 06:51:14 dmitry Exp $ */
+/* $Id: string.c,v 1.476 2005/09/05 10:55:35 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -5301,93 +5301,142 @@
 PHP_FUNCTION(str_pad)
 {
/* Input arguments */
-   zval **input,   /* Input string */
-**pad_length,  /* Length to pad to */
-**pad_string,  /* Padding string */
-**pad_type;/* Padding type 
(left/right/both) */
+   void   *input;  /* Input string */
+   int32_t pad_length; /* Length to pad to, in codepoints for Unicode 
*/
+   void   *padstr; /* Padding string */
+   int32_t pad_type;   /* Padding type (left/right/both) */
+   int32_t input_len, padstr_len; /* Lengths in code units for Unicode */
+   zend_uchar input_type, padstr_type;

/* Helper variables */
-   intnum_pad_chars;   /* Number of padding characters 
(total - input size) */
-   char  *result = NULL;   /* Resulting string */
-   intresult_len = 0;  /* Length of the resulting 
string */
-   char  *pad_str_val =  ;   /* Pointer to padding string */
-   intpad_str_len = 1; /* Length of the padding string */
-   intpad_type_val = STR_PAD_RIGHT; /* The padding type value */
-   inti, left_pad=0, right_pad=0;
+   int32_t input_codepts;  /* Number of codepts in Unicode input */
+   int32_t num_pad_chars;  /* Number of padding characters (total - input 
size) */
+   void   *result = NULL;  /* Resulting string */
+   int32_t result_len = 0; /* Length of the resulting string */
+   int32_t i, j, left_pad=0, right_pad=0;
+   UChar32 ch;
 
 
-   if (ZEND_NUM_ARGS()  2 || ZEND_NUM_ARGS()  4 ||
-   zend_get_parameters_ex(ZEND_NUM_ARGS(), input, pad_length, 
pad_string, pad_type) == FAILURE) {
+   if (ZEND_NUM_ARGS()  2 || ZEND_NUM_ARGS()  4) {
WRONG_PARAM_COUNT;
}
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, Tl|Tl,
+ input, input_len, 
input_type, pad_length,
+ padstr, padstr_len, 
padstr_type, pad_type) == FAILURE) {
+   return;
+   }
 
-   /* Perform initial conversion to expected data types. */
-   convert_to_string_ex(input);
-   convert_to_long_ex(pad_length);
-
-   num_pad_chars = Z_LVAL_PP(pad_length) - Z_STRLEN_PP(input);
-
+   if (input_type == IS_UNICODE) {
+   /* For Unicode, num_pad_chars/pad_length is number of 
codepoints */
+   i = 0; input_codepts = 0;
+   while (i  input_len) {
+   U16_FWD_1((UChar *)input, i, input_len);
+   input_codepts++;
+   }
+   num_pad_chars = pad_length - input_codepts;
+   } else {
+   num_pad_chars = pad_length - input_len;
+   }
/* If resulting string turns out to be shorter than input string,
   we simply copy the input and return. */
if (num_pad_chars  0) {
-   RETURN_ZVAL(*input, 1, 0);
+   if (input_type == IS_UNICODE) {
+   RETURN_UNICODEL((UChar *)input, input_len, 1);
+   } else if (input_type == IS_BINARY) {
+   RETURN_BINARYL((char *)input, input_len, 1);
+   } else {
+   RETURN_STRINGL((char *)input, input_len, 1);
+   }
}
 
-   /* Setup the padding string values if specified. */
+   /* Setup the padding string values if NOT specified. */
if (ZEND_NUM_ARGS()  2) {
-   convert_to_string_ex(pad_string);
-   if (Z_STRLEN_PP(pad_string) == 0) {
+   if (padstr_len == 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, Padding 
string cannot be empty.);
return;
}
-   pad_str_val = Z_STRVAL_PP(pad_string);
-   pad_str_len = Z_STRLEN_PP(pad_string);
-
if (ZEND_NUM_ARGS()  3) {
-   convert_to_long_ex(pad_type);
-   pad_type_val = 

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-09-05 Thread Rolland Santimano
rolland Mon Sep  5 12:37:46 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  Pointer arithmetic with char * rather than void *
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.476r2=1.477ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.476 php-src/ext/standard/string.c:1.477
--- php-src/ext/standard/string.c:1.476 Mon Sep  5 06:55:35 2005
+++ php-src/ext/standard/string.c   Mon Sep  5 12:37:45 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.476 2005/09/05 10:55:35 rolland Exp $ */
+/* $Id: string.c,v 1.477 2005/09/05 16:37:45 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -5273,20 +5273,20 @@
if (haystack_type == IS_UNICODE) {
while ((p = zend_u_memnstr((UChar *)p, (UChar *)needle, 
needle_len, (UChar *)endp)) != NULL) {
/*(UChar *)p += needle_len; // GCC 4.0.0 cannot compile 
this */
-   p += UBYTES(needle_len);
+   p = (UChar *)p + UBYTES(needle_len);
count++;
}
} else {
if (needle_len == 1) {
cmp = ((char *)needle)[0];
-   while ((p = memchr(p, cmp, endp - p))) {
+   while ((p = memchr(p, cmp, (char *)endp - (char *)p))) {
count++;
-   (char *)p++;
+   p = (char *)p + 1;
}
} else {
while ((p = php_memnstr((char *)p, (char *)needle, 
needle_len, (char *)endp))) {
/*(char *)p += needle_len; // GCC 4.0.0 cannot 
compile this */
-   p += needle_len;
+   p = (char *)p + needle_len;
count++;
}
}
@@ -5420,7 +5420,7 @@
} else {
for (i = 0; i  left_pad; i++)
*((char *)result + result_len++) = *((char *)padstr + 
(i % padstr_len));
-   memcpy(result + result_len, input, input_len);
+   memcpy((char *)result + result_len, input, input_len);
result_len += input_len;
for (i = 0; i  right_pad; i++)
*((char *)result + result_len++) = *((char *)padstr + 
(i % padstr_len));

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-27 Thread Rolland Santimano
rolland Sat Aug 27 15:14:05 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode impl of substr_count()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.473r2=1.474ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.473 php-src/ext/standard/string.c:1.474
--- php-src/ext/standard/string.c:1.473 Fri Aug 26 06:21:07 2005
+++ php-src/ext/standard/string.c   Sat Aug 27 15:14:05 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.473 2005/08/26 10:21:07 rolland Exp $ */
+/* $Id: string.c,v 1.474 2005/08/27 19:14:05 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -5202,62 +5202,91 @@
Returns the number of times a substring occurs in the string */
 PHP_FUNCTION(substr_count)
 {
-   zval **haystack, **needle, **offset, **length;
+   void *haystack, *needle;
+   int32_t haystack_len, needle_len;
+   zend_uchar haystack_type, needle_type;
+   long offset = 0, length = 0;
int ac = ZEND_NUM_ARGS();
int count = 0;
-   char *p, *endp, cmp;
-
-   if (ac  2 || ac  4 || zend_get_parameters_ex(ac, haystack, needle, 
offset, length) == FAILURE) {
-   WRONG_PARAM_COUNT;
+   void *p, *endp, *tmp;
+   int32_t i, j;
+   char cmp;
+
+   if (zend_parse_parameters(ac TSRMLS_CC, TT|ll,
+ haystack, 
haystack_len, haystack_type,
+ needle, needle_len, 
needle_type,
+ offset, length) == 
FAILURE) {
+   return;
}
 
-   convert_to_string_ex(haystack);
-   convert_to_string_ex(needle);
-
-   if (Z_STRLEN_PP(needle) == 0) {
+   if (needle_len == 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, Empty substring.);
RETURN_FALSE;
}
-   
-   p = Z_STRVAL_PP(haystack);
-   endp = p + Z_STRLEN_PP(haystack);
-   
+
+   if (haystack_type == IS_UNICODE) {
+   p = (UChar *)haystack;
+   endp = (UChar *)haystack + haystack_len;
+   } else {
+   p = (char *)haystack;
+   endp = (char *)haystack + haystack_len;
+   }
+
if (ac  2) {
-   convert_to_long_ex(offset);
-   if (Z_LVAL_PP(offset)  0) {
+   if (offset  0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset 
should be greater then or equal to 0.);
-   RETURN_FALSE;   
+   RETURN_FALSE;
+   }
+   if (haystack_type == IS_UNICODE) {
+   i = 0;
+   U16_FWD_N((UChar *)haystack, i, haystack_len, offset);
+   p = (UChar *)haystack + i;
+   } else {
+   p = (char *)haystack + offset;
}
-   p += Z_LVAL_PP(offset);
if (p  endp) {
-   php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset 
value %ld exceeds string length., Z_LVAL_PP(offset));
-   RETURN_FALSE;   
+   php_error_docref(NULL TSRMLS_CC, E_WARNING, Offset 
value %ld exceeds string length., offset);
+   RETURN_FALSE;
}
if (ac == 4) {
-   convert_to_long_ex(length);
-   if (Z_LVAL_PP(length) = 0) {
+   if (length  0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, 
Length should be greater than 0.);
-   RETURN_FALSE;   
+   RETURN_FALSE;
}
-   if ((p + Z_LVAL_PP(length))  endp) {
-   php_error_docref(NULL TSRMLS_CC, E_WARNING, 
Length value %ld exceeds string length., Z_LVAL_PP(length));
+   if (haystack_type == IS_UNICODE) {
+   j = i;
+   i = 0;
+   U16_FWD_N((UChar *)p, i, haystack_len-j, 
length);
+   tmp = (UChar *)p + i;
+   } else {
+   tmp = (char *)p + length;
+   }
+   if (tmp  endp) {
+   php_error_docref(NULL TSRMLS_CC, E_WARNING, 
Offset value %ld exceeds string length., offset);
RETURN_FALSE;
+   } else {
+   endp = tmp;
}
-   endp = p + Z_LVAL_PP(length);
}
}
-   
-   if (Z_STRLEN_PP(needle) == 1) {
-   cmp = 

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-26 Thread Rolland Santimano
rolland Fri Aug 26 06:21:09 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Updated strrev() to handle base+combining sequences
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.472r2=1.473ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.472 php-src/ext/standard/string.c:1.473
--- php-src/ext/standard/string.c:1.472 Tue Aug 23 08:53:28 2005
+++ php-src/ext/standard/string.c   Fri Aug 26 06:21:07 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.472 2005/08/23 12:53:28 dmitry Exp $ */
+/* $Id: string.c,v 1.473 2005/08/26 10:21:07 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3222,7 +3222,9 @@
 {
zval **str;
char *s, *e, *n, *p;
-   UChar *u_s, *u_e, *u_n, *u_p;
+   int32_t i, x1, x2;
+   UChar32 ch;
+   UChar *u_s, *u_n, *u_p;

if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, str) == FAILURE) {
WRONG_PARAM_COUNT;
@@ -3236,15 +3238,22 @@
u_n = eumalloc(Z_USTRLEN_PP(str)+1);
u_p = u_n;
u_s = Z_USTRVAL_PP(str);
-   u_e = u_s + Z_USTRLEN_PP(str) - 1;
 
-   while (u_e = u_s) {
-   if (U16_IS_TRAIL(*u_e)) {
-   *u_p = *(u_e-1);
-   *(u_p+1) = *u_e;
-   u_e -= 2; u_p += 2;
+   i = Z_USTRLEN_PP(str);
+   while (i  0) {
+   U16_PREV(u_s, 0, i, ch);
+   if (u_getCombiningClass(ch) == 0) {
+   u_p += zend_codepoint_to_uchar(ch, u_p);
} else {
-   *u_p++ = *u_e--;
+   x2 = i;
+   do {
+   U16_PREV(u_s, 0, i, ch);
+   } while (u_getCombiningClass(ch) != 0);
+   x1 = i;
+   while (x1 = x2) {
+   U16_NEXT(u_s, x1, Z_USTRLEN_PP(str), 
ch);
+   u_p += zend_codepoint_to_uchar(ch, u_p);
+   }
}
}
*u_p = 0;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-19 Thread Rolland Santimano
rolland Fri Aug 19 06:59:21 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Unicode capable impl() of strrev()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.466r2=1.467ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.466 php-src/ext/standard/string.c:1.467
--- php-src/ext/standard/string.c:1.466 Thu Aug 18 18:37:22 2005
+++ php-src/ext/standard/string.c   Fri Aug 19 06:59:19 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.466 2005/08/18 22:37:22 andrei Exp $ */
+/* $Id: string.c,v 1.467 2005/08/19 10:59:19 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3212,25 +3212,51 @@
 {
zval **str;
char *s, *e, *n, *p;
+   UChar *u_s, *u_e, *u_n, *u_p;

if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, str) == FAILURE) {
WRONG_PARAM_COUNT;
}
-   convert_to_string_ex(str);
-   
-   n = emalloc(Z_STRLEN_PP(str)+1);
-   p = n;
-   
-   s = Z_STRVAL_PP(str);
-   e = s + Z_STRLEN_PP(str);
-   
-   while (--e=s) {
-   *p++ = *e;
+
+   if (Z_TYPE_PP(str) != IS_UNICODE  Z_TYPE_PP(str) != IS_BINARY  
Z_TYPE_PP(str) != IS_STRING) {
+   convert_to_text_ex(str);
+   }
+
+   if (Z_TYPE_PP(str) == IS_UNICODE) {
+   u_n = eumalloc(Z_USTRLEN_PP(str)+1);
+   u_p = u_n;
+   u_s = Z_USTRVAL_PP(str);
+   u_e = u_s + Z_USTRLEN_PP(str) - 1;
+
+   while (u_e = u_s) {
+   if (U16_IS_TRAIL(*u_e)) {
+   *u_p = *(u_e-1);
+   *(u_p+1) = *u_e;
+   u_e -= 2; u_p += 2;
+   } else {
+   *u_p++ = *u_e--;
+   }
+   }
+   *u_p = 0;
+   } else {
+   n = emalloc(Z_STRLEN_PP(str)+1);
+   p = n;
+   s = Z_STRVAL_PP(str);
+   e = s + Z_STRLEN_PP(str);
+
+   while (--e = s) {
+   *(p++) = *e;
+   }
+   *p = '\0';
}

-   *p = '\0';
-   
-   RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0);
+   if (Z_TYPE_PP(str) == IS_UNICODE) {
+   RETVAL_UNICODEL(u_n, Z_USTRLEN_PP(str), 0);
+   } else if (Z_TYPE_PP(str) == IS_BINARY) {
+   RETVAL_BINARYL(n, Z_BINLEN_PP(str), 0);
+   } else {
+   RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0);
+   }
 }
 /* }}} */
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] Not getting mails from list

2005-08-18 Thread Rolland Santimano
Hello

Last week, I requested subcription to the foll lists via
http://www.php.net/mailing-lists.php

php-internals, php-i18n, php-announce, zend-engine-cvs

I'm not receiving any mails from these lists. I've already sent the
confirmation replies, twice. Do I have to do anything else ?

I know this is not the right list to raise this, but I didn't know of
any other ID to ping.

Thanks
Rolland

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



RE: [PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-18 Thread Rolland Santimano
--- Dmitry Stogov [EMAIL PROTECTED] wrote:

 Hi Roland,
 
 Please be more carefull with your patches.
 Compile PHP with --enable-debug and run make test and make utest
 before and after patch to see that you don't break something.
 
 Now I fixed all (I hope) implode() bugs that you introduced.
 
 Dmitry.

Thanks a lot for the tips, that really helped.

One Q: when/how is the memory alloc'ed by a SEPARATE_ZVAL() free'd ?
Should I be calling any funcn to do this ?

--
Rolland

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-17 Thread Rolland Santimano
rolland Wed Aug 17 03:14:14 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  Updated implode() impl as per Andrei's comments
  [http://news.php.net/php.cvs/33457]
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.457r2=1.458ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.457 php-src/ext/standard/string.c:1.458
--- php-src/ext/standard/string.c:1.457 Tue Aug 16 16:22:33 2005
+++ php-src/ext/standard/string.c   Wed Aug 17 03:14:12 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.457 2005/08/16 20:22:33 helly Exp $ */
+/* $Id: string.c,v 1.458 2005/08/17 07:14:12 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -1144,14 +1144,11 @@
 PHPAPI void php_implode(zval *delim, zval *arr, zval *retval) 
 {
zend_uchar  return_type;
-   int numelems, i;
+   int numelems, i=0;
HashPositionpos;
zval**tmp;
TSRMLS_FETCH();
 
-   if (Z_TYPE_P(delim) != IS_UNICODE  Z_TYPE_P(delim) != IS_BINARY) {
-   convert_to_string_ex(delim);
-   }
Z_TYPE_P(retval) = return_type = Z_TYPE_P(delim); /* ... to start off */
 
/* Setup return value */
@@ -1169,18 +1166,14 @@
}
 
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), pos);
-   for (i = 1 ; i = numelems ; i++) {
-   if (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void 
**)tmp, pos) != SUCCESS) {
-   /* Shouldn't happen ? */
-   return;
-   }
-   zend_hash_move_forward_ex(Z_ARRVAL_P(arr), pos);
+   while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **)tmp, 
pos) == SUCCESS) {
if (Z_TYPE_PP(tmp) != return_type) {
/* Convert to common type, if possible */
if (return_type == IS_UNICODE) {
if (Z_TYPE_PP(tmp) == IS_BINARY) {
/* ERROR */
-   php_error_docref(NULL TSRMLS_CC, 
E_WARNING, Mixed string types);
+   php_error_docref(NULL TSRMLS_CC, 
E_WARNING,
+
Cannot mix binary strings with other string types);
efree(Z_USTRVAL_P(retval));
ZVAL_FALSE(retval);
return;
@@ -1191,7 +1184,8 @@
} else if (return_type == IS_BINARY) {
if (Z_TYPE_PP(tmp) == IS_UNICODE || 
Z_TYPE_PP(tmp) == IS_STRING) {
/* ERROR */
-   php_error_docref(NULL TSRMLS_CC, 
E_WARNING, Mixed string types);
+   php_error_docref(NULL TSRMLS_CC, 
E_WARNING,
+
Cannot mix binary strings with other string types);
efree(Z_BINVAL_P(retval));
ZVAL_FALSE(retval);
return;
@@ -1207,7 +1201,8 @@
Z_TYPE_P(retval) = return_type = 
IS_UNICODE;
} else if (Z_TYPE_PP(tmp) == IS_BINARY) {
/* ERROR */
-   php_error_docref(NULL TSRMLS_CC, 
E_WARNING, Mixed string types);
+   php_error_docref(NULL TSRMLS_CC, 
E_WARNING,
+
Cannot mix binary strings with other string types);
efree(Z_STRVAL_P(retval));
ZVAL_FALSE(retval);
return;
@@ -1222,30 +1217,30 @@
if (return_type == IS_UNICODE) {
Z_USTRVAL_P(retval) = eurealloc(Z_USTRVAL_P(retval),

Z_USTRLEN_P(retval)+Z_USTRLEN_PP(tmp));
-   memcpy(Z_USTRVAL_P(retval)+Z_USTRLEN_P(retval),
-  Z_USTRVAL_PP(tmp), 
Z_USTRLEN_PP(tmp)*sizeof(UChar));
+   memcpy(Z_USTRVAL_P(retval)+Z_USTRLEN_P(retval), 
Z_USTRVAL_PP(tmp),
+  UBYTES(Z_USTRLEN_PP(tmp)));
Z_USTRLEN_P(retval) += Z_USTRLEN_PP(tmp);
-   if (i  numelems) { /* Append delim */
+   if (++i  numelems) { /* Append delim */

[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-17 Thread Rolland Santimano
rolland Wed Aug 17 06:26:03 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  php_u_trim_range(): Alloc UChar32 units rather than UChar
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.459r2=1.460ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.459 php-src/ext/standard/string.c:1.460
--- php-src/ext/standard/string.c:1.459 Wed Aug 17 03:59:28 2005
+++ php-src/ext/standard/string.c   Wed Aug 17 06:26:02 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.459 2005/08/17 07:59:28 dmitry Exp $ */
+/* $Id: string.c,v 1.460 2005/08/17 10:26:02 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -610,7 +610,7 @@
c = input[0];
if ( (input+3  end)  input[1] == '.'  input[2] == '.'  
input[3] = c ) {
tmp_len += (input[3] - c + 1);
-   tmp = (UChar32 *)erealloc(tmp, tmp_len*sizeof(UChar));
+   tmp = (UChar32 *)erealloc(tmp, tmp_len*sizeof(UChar32));
for ( ; c = input[3] ; c++ ) {
if ( U_IS_UNICODE_CHAR(c) ) tmp[idx++] = c;
}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-17 Thread Rolland Santimano
rolland Wed Aug 17 13:33:21 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  Unicode impl of ucfirst()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.461r2=1.462ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.461 php-src/ext/standard/string.c:1.462
--- php-src/ext/standard/string.c:1.461 Wed Aug 17 07:36:30 2005
+++ php-src/ext/standard/string.c   Wed Aug 17 13:33:19 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.461 2005/08/17 11:36:30 dmitry Exp $ */
+/* $Id: string.c,v 1.462 2005/08/17 17:33:19 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2866,6 +2866,39 @@
 }
 /* }}} */
 
+/* {{{ proto php_u_ucfirst
+   Makes an Unicode string's first character uppercase */
+static void php_u_ucfirst(zval *ustr, zval *return_value)
+{
+   UChar32 lc, uc;
+   UChar tmp[2] = {0, 0}; /* UChar32 will be converted to upto 2 UChar 
units ? */
+   int32_t tmp_len = 2;
+   int32_t pos = 0;
+   UErrorCode err = U_ZERO_ERROR;
+
+   U16_NEXT(Z_USTRVAL_P(ustr), pos, Z_USTRLEN_P(ustr), lc);
+   uc = u_toupper(lc);
+   if ( uc == lc ) {
+   ZVAL_UNICODEL(return_value, Z_USTRVAL_P(ustr), 
Z_USTRLEN_P(ustr), 1);
+   return;
+   }
+
+   u_strFromUTF32(tmp, tmp_len, tmp_len, uc, 1, err);
+   if (U_FAILURE(err)) {
+   ZVAL_EMPTY_UNICODE(return_value);
+   return;
+   }
+
+   Z_USTRVAL_P(return_value) = eumalloc(tmp_len+Z_USTRLEN_P(ustr)-pos+1);
+   Z_USTRVAL_P(return_value)[0] = tmp[0];
+   if (tmp_len  1) {
+   Z_USTRVAL_P(return_value)[1] = tmp[1];
+   }
+   memcpy(Z_USTRVAL_P(return_value)+tmp_len, Z_USTRVAL_P(ustr)+pos, 
UBYTES(Z_USTRLEN_P(ustr)-pos+1));
+   Z_USTRLEN_P(return_value) = tmp_len+Z_USTRLEN_P(ustr)-pos;
+}
+/* }}} */
+
 /* {{{ proto string ucfirst(string str)
Makes a string's first character uppercase */
 PHP_FUNCTION(ucfirst)
@@ -2875,14 +2908,29 @@
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, str) == FAILURE) 
{
WRONG_PARAM_COUNT;
}
-   convert_to_string_ex(str);
 
-   if (!Z_STRLEN_PP(str)) {
+   if (Z_TYPE_PP(str) != IS_UNICODE  Z_TYPE_PP(str) != IS_BINARY  
Z_TYPE_PP(str) != IS_STRING) {
+   convert_to_text_ex(str);
+   }
+
+   if (Z_TYPE_PP(str) == IS_UNICODE  !Z_USTRLEN_PP(str)) {
+   RETURN_EMPTY_UNICODE();
+   } else if (Z_TYPE_PP(str) == IS_BINARY  !Z_BINLEN_PP(str)) {
+   RETURN_EMPTY_BINARY();
+   } else if (!Z_STRLEN_PP(str)) {
RETURN_EMPTY_STRING();
}
 
-   ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
-   *Z_STRVAL_P(return_value) = toupper((unsigned char) 
*Z_STRVAL_P(return_value));
+   if (Z_TYPE_PP(str) == IS_UNICODE) {
+   Z_TYPE_P(return_value) = IS_UNICODE;
+   php_u_ucfirst(*str, return_value);
+   } else if (Z_TYPE_PP(str) == IS_BINARY) {
+   ZVAL_BINARYL(return_value, Z_BINVAL_PP(str), Z_BINLEN_PP(str), 
1);
+   *Z_BINVAL_P(return_value) = toupper((unsigned char) 
*Z_BINVAL_P(return_value));
+   } else {
+   ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 
1);
+   *Z_STRVAL_P(return_value) = toupper((unsigned char) 
*Z_STRVAL_P(return_value));
+   }
 }
 /* }}} */
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-17 Thread Rolland Santimano
rolland Wed Aug 17 16:29:02 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  Unicode-capable impl of ucwords()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.463r2=1.464ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.463 php-src/ext/standard/string.c:1.464
--- php-src/ext/standard/string.c:1.463 Wed Aug 17 13:39:04 2005
+++ php-src/ext/standard/string.c   Wed Aug 17 16:29:00 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.463 2005/08/17 17:39:04 andrei Exp $ */
+/* $Id: string.c,v 1.464 2005/08/17 20:29:00 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2940,6 +2940,53 @@
 }
 /* }}} */
 
+/* {{{ proto php_u_ucwords()
+   Uppercase the first character of every word in an Unicode string */
+static void php_u_ucwords(zval *ustr, zval *retval)
+{
+   UChar32 *codepts;
+   int32_t len, retval_len;
+   int32_t i;
+   UErrorCode err;
+
+   len = Z_USTRLEN_P(ustr);
+   codepts = (UChar32 *)emalloc((len+1)*sizeof(UChar32));
+   err = U_ZERO_ERROR;
+   u_strToUTF32(codepts, len+1, len, Z_USTRVAL_P(ustr), len, err);
+   if (U_FAILURE(err)) {
+   efree(codepts);
+   ZVAL_EMPTY_UNICODE(retval);
+   return;
+   }
+
+   codepts[0] = u_toupper(codepts[0]);
+   for (i = 1; i  len ; i++) {
+   if (u_isWhitespace(codepts[i-1]) == TRUE) {
+   codepts[i] = u_toupper(codepts[i]);
+   }
+   }
+
+   retval_len = len;
+   Z_USTRVAL_P(retval) = eumalloc(retval_len+1);
+   err = U_ZERO_ERROR;
+   u_strFromUTF32(Z_USTRVAL_P(retval), retval_len+1, retval_len, codepts, 
len, err);
+   if (U_FAILURE(err) == U_BUFFER_OVERFLOW_ERROR) {
+   err = U_ZERO_ERROR;
+   Z_USTRVAL_P(retval) = eurealloc(Z_USTRVAL_P(retval), 
retval_len+1);
+   u_strFromUTF32(Z_USTRVAL_P(retval), retval_len+1, NULL, 
codepts, len, err);
+   }
+
+   if (U_SUCCESS(err)) {
+   Z_USTRLEN_P(retval) = retval_len;
+   } else {
+   efree(Z_USTRVAL_P(retval));
+   ZVAL_EMPTY_UNICODE(retval);
+   }
+
+   efree(codepts);
+}
+/* }}} */
+
 /* {{{ proto string ucwords(string str)
Uppercase the first character of every word in a string */
 PHP_FUNCTION(ucwords)
@@ -2950,19 +2997,35 @@
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, str) == FAILURE) 
{
WRONG_PARAM_COUNT;
}
-   convert_to_string_ex(str);
 
-   if (!Z_STRLEN_PP(str)) {
+   if (Z_TYPE_PP(str) != IS_UNICODE  Z_TYPE_PP(str) != IS_BINARY  
Z_TYPE_PP(str) != IS_STRING) {
+   convert_to_text_ex(str);
+   }
+
+   if (Z_TYPE_PP(str) == IS_UNICODE  !Z_USTRLEN_PP(str)) {
+   RETURN_EMPTY_UNICODE();
+   } else if (Z_TYPE_PP(str) == IS_BINARY  !Z_BINLEN_PP(str)) {
+   RETURN_EMPTY_BINARY();
+   } else if (!Z_STRLEN_PP(str)) {
RETURN_EMPTY_STRING();
}
 
-   ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
-   r = Z_STRVAL_P(return_value);
+   if (Z_TYPE_PP(str) == IS_UNICODE) {
+   Z_TYPE_P(return_value) = IS_UNICODE;
+   php_u_ucwords(*str, return_value);
+   } else {
+   if (Z_TYPE_PP(str) == IS_BINARY) {
+   ZVAL_BINARYL(return_value, Z_BINVAL_PP(str), 
Z_BINLEN_PP(str), 1);
+   } else {
+   ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), 
Z_STRLEN_PP(str), 1);
+   }
 
-   *r = toupper((unsigned char) *r);
-   for (r_end = r + Z_STRLEN_P(return_value) - 1; r  r_end; ) {
-   if (isspace((int) *(unsigned char *)r++)) {
-   *r = toupper((unsigned char) *r);
+   r = Z_STRVAL_P(return_value);
+   *r = toupper((unsigned char) *r);
+   for (r_end = r + Z_STRLEN_P(return_value) - 1; r  r_end; ) {
+   if (isspace((int) *(unsigned char *)r++)) {
+   *r = toupper((unsigned char) *r);
+   }
}
}
 }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-17 Thread Rolland Santimano
rolland Thu Aug 18 01:53:48 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  - Removed 'proto' from description of internal funcns: ucfirst()/ucwords()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.464r2=1.465ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.464 php-src/ext/standard/string.c:1.465
--- php-src/ext/standard/string.c:1.464 Wed Aug 17 16:29:00 2005
+++ php-src/ext/standard/string.c   Thu Aug 18 01:53:46 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.464 2005/08/17 20:29:00 rolland Exp $ */
+/* $Id: string.c,v 1.465 2005/08/18 05:53:46 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2872,7 +2872,7 @@
 }
 /* }}} */
 
-/* {{{ proto php_u_ucfirst
+/* {{{ php_u_ucfirst()
Makes an Unicode string's first character uppercase */
 static void php_u_ucfirst(zval *ustr, zval *return_value)
 {
@@ -2940,7 +2940,7 @@
 }
 /* }}} */
 
-/* {{{ proto php_u_ucwords()
+/* {{{ php_u_ucwords()
Uppercase the first character of every word in an Unicode string */
 static void php_u_ucwords(zval *ustr, zval *retval)
 {

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard php_string.h string.c

2005-08-16 Thread Rolland Santimano
rolland Tue Aug 16 02:02:57 2005 EDT

  Modified files:  
/php-src/ext/standard   php_string.h string.c 
  Log:
  php_trim() takes extra arg to determine string type to be returned
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/php_string.h?r1=1.88r2=1.89ty=u
Index: php-src/ext/standard/php_string.h
diff -u php-src/ext/standard/php_string.h:1.88 
php-src/ext/standard/php_string.h:1.89
--- php-src/ext/standard/php_string.h:1.88  Thu Aug 11 19:35:59 2005
+++ php-src/ext/standard/php_string.h   Tue Aug 16 02:02:55 2005
@@ -17,7 +17,7 @@
+--+
 */
 
-/* $Id: php_string.h,v 1.88 2005/08/11 23:35:59 andrei Exp $ */
+/* $Id: php_string.h,v 1.89 2005/08/16 06:02:55 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */
 
@@ -132,7 +132,7 @@
int needle_len, char *str, int str_len, int *_new_length, int 
case_sensitivity, int *replace_count);
 PHPAPI char *php_str_to_str(char *haystack, int length, char *needle,
int needle_len, char *str, int str_len, int *_new_length);
-PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zval 
*return_value, int mode TSRMLS_DC);
+PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zend_uchar 
str_type, zval *return_value, int mode TSRMLS_DC);
 PHPAPI size_t php_strip_tags(char *rbuf, int len, int *state, char *allow, int 
allow_len);
 PHPAPI int php_char_to_str_ex(char *str, uint len, char from, char *to, int 
to_len, pval *result, int case_sensitivity, int *replace_count);
 PHPAPI int php_char_to_str(char *str, uint len, char from, char *to, int 
to_len, pval *result);
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.453r2=1.454ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.453 php-src/ext/standard/string.c:1.454
--- php-src/ext/standard/string.c:1.453 Mon Aug 15 15:12:59 2005
+++ php-src/ext/standard/string.c   Tue Aug 16 02:02:56 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.453 2005/08/15 19:12:59 johannes Exp $ */
+/* $Id: string.c,v 1.454 2005/08/16 06:02:56 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -538,7 +538,7 @@
  * mode 3 : trim left and right
  * what indicates which chars are to be trimmed. NULL-default (' \t\n\r\v\0')
  */
-PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zval 
*return_value, int mode TSRMLS_DC)
+PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zend_uchar 
str_type, zval *return_value, int mode TSRMLS_DC)
 {
register int i;
int trimmed = 0;
@@ -572,7 +572,11 @@
}
 
if (return_value) {
-   RETVAL_STRINGL(c, len, 1);
+   if ( str_type == IS_BINARY ) {
+   RETVAL_BINARYL(c, len, 1);
+   } else {
+   RETVAL_STRINGL(c, len, 1);
+   }
} else {
return estrndup(c, len);
}
@@ -750,13 +754,13 @@
if ( str_type == IS_UNICODE ) {
php_u_trim(str, str_len, what, what_len, return_value, 
mode TSRMLS_CC);
} else {
-   php_trim(str, str_len, what, what_len, return_value, 
mode TSRMLS_CC);
+   php_trim(str, str_len, what, what_len, str_type, 
return_value, mode TSRMLS_CC);
}
} else {
if ( str_type == IS_UNICODE ) {
php_u_trim(str, str_len, NULL, 0, return_value, mode 
TSRMLS_CC);
} else {
-   php_trim(str, str_len, NULL, 0, return_value, mode 
TSRMLS_CC);
+   php_trim(str, str_len, NULL, 0, str_type, return_value, 
mode TSRMLS_CC);
}
}
 }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/simplexml simplexml.c /ext/standard http_fopen_wrapper.c

2005-08-16 Thread Rolland Santimano
rolland Tue Aug 16 02:05:00 2005 EDT

  Modified files:  
/php-src/ext/simplexml  simplexml.c 
/php-src/ext/standard   http_fopen_wrapper.c 
  Log:
  Updated calls to php_trim() to provide arg for returned string type
  
  
http://cvs.php.net/diff.php/php-src/ext/simplexml/simplexml.c?r1=1.154r2=1.155ty=u
Index: php-src/ext/simplexml/simplexml.c
diff -u php-src/ext/simplexml/simplexml.c:1.154 
php-src/ext/simplexml/simplexml.c:1.155
--- php-src/ext/simplexml/simplexml.c:1.154 Fri Aug 12 10:08:25 2005
+++ php-src/ext/simplexml/simplexml.c   Tue Aug 16 02:04:58 2005
@@ -18,7 +18,7 @@
   +--+
 */
 
-/* $Id: simplexml.c,v 1.154 2005/08/12 14:08:25 sebastian Exp $ */
+/* $Id: simplexml.c,v 1.155 2005/08/16 06:04:58 rolland Exp $ */
 
 #ifdef HAVE_CONFIG_H
 #include config.h
@@ -337,7 +337,7 @@
trim_zv = *member;
zval_copy_ctor(trim_zv);
convert_to_string(trim_zv);
-   php_trim(Z_STRVAL(trim_zv), Z_STRLEN(trim_zv), NULL, 0, 
tmp_zv, 3 TSRMLS_CC);
+   php_trim(Z_STRVAL(trim_zv), Z_STRLEN(trim_zv), NULL, 0, 
IS_STRING, tmp_zv, 3 TSRMLS_CC);
zval_dtor(trim_zv);
member = tmp_zv;
}
@@ -1736,7 +1736,7 @@
 {
php_info_print_table_start();
php_info_print_table_header(2, Simplexml support, enabled);
-   php_info_print_table_row(2, Revision, $Revision: 1.154 $);
+   php_info_print_table_row(2, Revision, $Revision: 1.155 $);
php_info_print_table_row(2, Schema support,
 #ifdef LIBXML_SCHEMAS_ENABLED
enabled);
http://cvs.php.net/diff.php/php-src/ext/standard/http_fopen_wrapper.c?r1=1.99r2=1.100ty=u
Index: php-src/ext/standard/http_fopen_wrapper.c
diff -u php-src/ext/standard/http_fopen_wrapper.c:1.99 
php-src/ext/standard/http_fopen_wrapper.c:1.100
--- php-src/ext/standard/http_fopen_wrapper.c:1.99  Wed Aug  3 10:08:04 2005
+++ php-src/ext/standard/http_fopen_wrapper.c   Tue Aug 16 02:04:59 2005
@@ -19,7 +19,7 @@
|  Sara Golemon [EMAIL PROTECTED]  |
+--+
  */
-/* $Id: http_fopen_wrapper.c,v 1.99 2005/08/03 14:08:04 sniper Exp $ */ 
+/* $Id: http_fopen_wrapper.c,v 1.100 2005/08/16 06:04:59 rolland Exp $ */ 
 
 #include php.h
 #include php_globals.h
@@ -265,7 +265,7 @@
Z_STRLEN_PP(tmpzval)) {
/* Remove newlines and spaces from start and end,
   php_trim will estrndup() */
-   tmp = php_trim(Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), 
NULL, 0, NULL, 3 TSRMLS_CC);
+   tmp = php_trim(Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), 
NULL, 0, IS_STRING, NULL, 3 TSRMLS_CC);
if (strlen(tmp)  0) {
if (!header_init) { /* Remove post headers for 
redirects */
int l = strlen(tmp);
@@ -291,7 +291,7 @@
}
}
efree(tmp_c);
-   tmp_c = php_trim(tmp, strlen(tmp), NULL, 0, 
NULL, 3 TSRMLS_CC);
+   tmp_c = php_trim(tmp, strlen(tmp), NULL, 0, 
IS_STRING, NULL, 3 TSRMLS_CC);
efree(tmp);
tmp = tmp_c;
}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/standard string.c

2005-08-16 Thread Rolland Santimano
rolland Tue Aug 16 08:04:14 2005 EDT

  Modified files:  
/php-src/ext/standard   string.c 
  Log:
  Unicode capable impl of implode()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.454r2=1.455ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.454 php-src/ext/standard/string.c:1.455
--- php-src/ext/standard/string.c:1.454 Tue Aug 16 02:02:56 2005
+++ php-src/ext/standard/string.c   Tue Aug 16 08:04:13 2005
@@ -18,7 +18,7 @@
+--+
  */
 
-/* $Id: string.c,v 1.454 2005/08/16 06:02:56 rolland Exp $ */
+/* $Id: string.c,v 1.455 2005/08/16 12:04:13 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -1141,36 +1141,115 @@
 
 /* {{{ php_implode
  */
-PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value) 
+PHPAPI void php_implode(zval *delim, zval *arr, zval *retval) 
 {
-   zval **tmp;
-   HashPosition   pos;
-   smart_str  implstr = {0};
-   intnumelems, i = 0;
+   zend_uchar  return_type;
+   int numelems, i;
+   HashPositionpos;
+   zval**tmp;
+   void*elem;
+   int32_t elem_chars, elem_len;
+
+   if (Z_TYPE_P(delim) != IS_UNICODE  Z_TYPE_P(delim) != IS_BINARY) {
+   convert_to_string_ex(delim);
+   }
+   Z_TYPE_P(retval) = return_type = Z_TYPE_P(delim); /* ... to start off */
+
+   /* Setup return value */
+   if (return_type == IS_UNICODE) {
+   ZVAL_EMPTY_UNICODE(retval);
+   } else if (return_type == IS_BINARY) {
+   ZVAL_EMPTY_BINARY(retval);
+   } else {
+   ZVAL_EMPTY_STRING(retval);
+   }
 
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr));
-
if (numelems == 0) {
-   RETURN_EMPTY_STRING();
+   return;
}
 
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), pos);
-
-   while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **) tmp, 
pos) == SUCCESS) {
-   if ((*tmp)-type != IS_STRING) {
-   SEPARATE_ZVAL(tmp);
-   convert_to_string(*tmp);
-   } 
-   
-   smart_str_appendl(implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp));
-   if (++i != numelems) {
-   smart_str_appendl(implstr, Z_STRVAL_P(delim), 
Z_STRLEN_P(delim));
+   for (i = 1 ; i = numelems ; i++) {
+   if (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void 
**)tmp, pos) != SUCCESS) {
+   /* Shouldn't happen ? */
+   return;
}
zend_hash_move_forward_ex(Z_ARRVAL_P(arr), pos);
+   if (Z_TYPE_PP(tmp) != return_type) {
+   /* Convert to common type, if possible */
+   if (return_type == IS_UNICODE) {
+   if (Z_TYPE_PP(tmp) == IS_BINARY) {
+   /* ERROR */
+   php_error_docref(NULL TSRMLS_CC, 
E_WARNING, Mixed string types);
+   efree(Z_USTRVAL_P(retval));
+   ZVAL_FALSE(retval);
+   return;
+   } else {
+   SEPARATE_ZVAL(tmp);
+   convert_to_unicode_ex(tmp);
+   }
+   } else if (return_type == IS_BINARY) {
+   if (Z_TYPE_PP(tmp) == IS_UNICODE || 
Z_TYPE_PP(tmp) == IS_STRING) {
+   /* ERROR */
+   php_error_docref(NULL TSRMLS_CC, 
E_WARNING, Mixed string types);
+   efree(Z_BINVAL_P(retval));
+   ZVAL_FALSE(retval);
+   return;
+   } else {
+   SEPARATE_ZVAL(tmp);
+   convert_to_binary_ex(tmp);
+   }
+   } else {
+   if (Z_TYPE_PP(tmp) == IS_UNICODE) {
+   /* Convert IS_STRING up to IS_UNICODE */
+   convert_to_unicode_ex(retval);
+   convert_to_unicode_ex(delim);
+   Z_TYPE_P(retval) = return_type = 
IS_UNICODE;
+   } else if (Z_TYPE_PP(tmp) == IS_BINARY) {
+   /* ERROR */
+   php_error_docref(NULL TSRMLS_CC, 
E_WARNING, Mixed