derick Thu Nov 16 20:33:44 2006 UTC
Modified files:
/php-src/ext/standard string.c
/php-src/ext/standard/tests/strings strtr2.phpt
Log:
- Add the strtr implementation for unicode for the case where there are
characters with a code point value >= 256.
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.610&r2=1.611&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.610 php-src/ext/standard/string.c:1.611
--- php-src/ext/standard/string.c:1.610 Fri Nov 10 12:01:50 2006
+++ php-src/ext/standard/string.c Thu Nov 16 20:33:44 2006
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.610 2006/11/10 12:01:50 andrei Exp $ */
+/* $Id: string.c,v 1.611 2006/11/16 20:33:44 derick Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -3805,6 +3805,43 @@
/* {{{ php_u_strtr
*/
+static void text_iter_helper_move(UChar *text, int32_t text_len, int32_t
*offset, int32_t *cp_offset)
+{
+ UChar32 cp;
+ int32_t tmp, tmp2;
+
+ if (*offset == UBRK_DONE) {
+ return;
+ }
+
+ if (*offset == text_len) {
+ *offset = UBRK_DONE;
+ *cp_offset = UBRK_DONE;
+ } else {
+ U16_NEXT(text, (*offset), text_len, cp);
+ (*cp_offset)++;
+
+ if (u_getCombiningClass(cp) == 0) {
+ tmp = *offset;
+ tmp2 = *cp_offset;
+ /*
+ * At the end of the from cp will be 0 because of the
NULL
+ * terminating NULL, so combining class will be 0 as
well.
+ */
+ while (tmp < text_len) {
+ U16_NEXT(text, tmp, text_len, cp);
+ tmp2++;
+ if (u_getCombiningClass(cp) == 0) {
+ break;
+ } else {
+ *offset = tmp;
+ *cp_offset = tmp2;
+ }
+ }
+ }
+ }
+}
+
PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int
str_from_len, UChar *str_to, int str_to_len, int trlen, int *outlen TSRMLS_DC)
{
int i;
@@ -3861,19 +3898,43 @@
* array in such a way that we can reuse the code in
php_u_strtr_array
* to do the replacements in order to avoid duplicating code. */
HashTable *tmp_hash;
- int minlen = 128*1024, maxlen;
- zval *tmp;
- UChar x[2] = { 0, };
+ int minlen = 128*1024, maxlen = 0;
+ int32_t prev_from_offset = 0, from_offset = 0, from_cp_offset =
0;
+ int32_t prev_to_offset = 0, to_offset = 0, to_cp_offset = 0;
+ zval *entry;
+ UChar *key_string;
tmp_hash = emalloc(sizeof(HashTable));
- zend_hash_init(tmp_hash, 0, NULL, NULL, 0);
+ zend_hash_init(tmp_hash, 0, ZVAL_PTR_DTOR, NULL, 0);
/* Loop over the two strings and prepare the hash entries */
- MAKE_STD_ZVAL(tmp);
- x[0] = (UChar) 0x58 /*'X'*/;
- ZVAL_UNICODEL(tmp, x, 1, 1);
- minlen = maxlen = 1;
- zend_u_hash_add(tmp_hash, IS_UNICODE, ZSTR("a"), 2, &tmp,
sizeof(zval *), NULL);
+ do
+ {
+ text_iter_helper_move(str_from, str_from_len,
&from_offset, &from_cp_offset);
+ text_iter_helper_move(str_to, str_to_len, &to_offset,
&to_cp_offset);
+
+ if (from_offset != -1 && to_offset != -1) {
+ if (from_cp_offset - prev_from_offset > maxlen)
{
+ maxlen = from_cp_offset -
prev_from_offset;
+ }
+ if (from_cp_offset - prev_from_offset < minlen)
{
+ minlen = from_cp_offset -
prev_from_offset;
+ }
+
+ MAKE_STD_ZVAL(entry);
+ ZVAL_UNICODEL(entry, str_to + prev_to_offset,
to_cp_offset - prev_to_offset, 0);
+
+ key_string = eumalloc(from_cp_offset -
prev_from_offset + 1);
+ memcpy(key_string, str_from + prev_from_offset,
UBYTES(from_cp_offset - prev_from_offset));
+ key_string[from_cp_offset - prev_from_offset] =
0;
+
+ zend_u_hash_add(tmp_hash, IS_UNICODE,
ZSTR(key_string), from_cp_offset - prev_from_offset + 1, &entry, sizeof(zval*),
NULL);
+ efree(key_string);
+ }
+
+ prev_from_offset = from_offset;
+ prev_to_offset = to_offset;
+ } while (from_offset != -1 && to_offset != -1);
/* Run the replacement */
str = php_u_strtr_array(str, len, tmp_hash, minlen, maxlen,
outlen TSRMLS_CC);
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/tests/strings/strtr2.phpt?r1=1.3&r2=1.4&diff_format=u
Index: php-src/ext/standard/tests/strings/strtr2.phpt
diff -u php-src/ext/standard/tests/strings/strtr2.phpt:1.3
php-src/ext/standard/tests/strings/strtr2.phpt:1.4
--- php-src/ext/standard/tests/strings/strtr2.phpt:1.3 Wed Jun 28 15:07:14 2006
+++ php-src/ext/standard/tests/strings/strtr2.phpt Thu Nov 16 20:33:44 2006
@@ -28,7 +28,7 @@
// Test with combining chars 2
$from = "aÌb";
$to = "12";
- $string = "xyzabc";
+ $string = "xyzaÌbc";
var_dump( strtr( $string, $from, $to ) );
// Test with combining chars 3
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php