rolland Thu Sep 8 01:05:38 2005 EDT
Modified files:
/php-src/ext/standard string.c
Log:
- Unicode impl of substr_replace()
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.478&r2=1.479&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.478 php-src/ext/standard/string.c:1.479
--- php-src/ext/standard/string.c:1.478 Wed Sep 7 03:00:03 2005
+++ php-src/ext/standard/string.c Thu Sep 8 01:05:36 2005
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.478 2005/09/07 07:00:03 dmitry Exp $ */
+/* $Id: string.c,v 1.479 2005/09/08 05:05:36 rolland Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -2553,6 +2553,118 @@
/* }}} */
+/* {{{ php_unify_string_types
+ */
+PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC)
+{
+ if (p == NULL || q == NULL) {
+ return;
+ }
+
+ if (Z_TYPE_PP(p) == IS_UNICODE) {
+ if (Z_TYPE_PP(q) == IS_BINARY) {
+ convert_to_binary_ex(p);
+ } else {
+ convert_to_unicode_ex(q);
+ }
+ } else if (Z_TYPE_PP(p) == IS_BINARY) {
+ convert_to_binary_ex(q);
+ } else {
+ if (Z_TYPE_PP(q) == IS_BINARY) {
+ convert_to_binary_ex(p);
+ } else {
+ convert_to_string_ex(q);
+ }
+ }
+}
+/* {{{ */
+
+/* {{{ php_adjust_limits
+ */
+PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l)
+{
+ int32_t i, str_codepts;
+
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ i = 0; str_codepts = 0;
+ while (i < Z_USTRLEN_PP(str)) {
+ U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str));
+ str_codepts++;
+ }
+ } else {
+ str_codepts = Z_STRLEN_PP(str);
+ }
+
+ /* If "from" position is negative, count start position from the end
+ * of the string */
+ if (*f < 0) {
+ *f = str_codepts + *f;
+ if (*f < 0) {
+ *f = 0;
+ }
+ } else if (*f > str_codepts) {
+ *f = str_codepts;
+ }
+ /* If "length" position is negative, set it to the length
+ * needed to stop that many codepts/chars from the end of the string */
+ if (*l < 0) {
+ *l = str_codepts - *f + *l;
+ if (*l < 0) {
+ *l = 0;
+ }
+ }
+ if (((unsigned)(*f) + (unsigned)(*l)) > str_codepts) {
+ *l = str_codepts - *f;
+ }
+}
+/* }}} */
+
+/* {{{ php_do_substr_replace
+ */
+PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl,
int32_t f, int32_t l TSRMLS_DC)
+{
+ void *buf;
+ int32_t buf_len, idx;
+ UChar ch;
+
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl)
+ 1));
+
+ /* buf_len is codept count here */
+ buf_len = 0; idx = 0;
+ while (f-- > 0) {
+ U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch);
+ buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf +
buf_len);
+ }
+ if (repl != NULL) {
+ u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl),
Z_USTRLEN_PP(repl));
+ buf_len += Z_USTRLEN_PP(repl);
+ }
+ U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l);
+ u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx,
Z_USTRLEN_PP(str) - idx);
+ buf_len += (Z_USTRLEN_PP(str) - idx);
+
+ *((UChar *)buf + buf_len) = 0;
+ buf = erealloc(buf, UBYTES(buf_len + 1));
+ } else {
+ /* buf_len is char count here */
+ buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl);
+ buf = emalloc(buf_len + 1);
+
+ memcpy(buf, Z_STRVAL_PP(str), f);
+ if (repl != NULL ) {
+ memcpy((char *)buf + f, Z_STRVAL_PP(repl),
Z_STRLEN_PP(repl));
+ }
+ memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) +
f + l, Z_STRLEN_PP(str) - f - l);
+
+ *((char *)buf + buf_len) = '\0';
+ }
+
+ *result = buf;
+ return buf_len;
+}
+/* }}} */
+
/* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed
length])
Replaces part of a string with another string */
PHP_FUNCTION(substr_replace)
@@ -2561,10 +2673,10 @@
zval **from;
zval **len = NULL;
zval **repl;
- char *result;
- int result_len;
- int l = 0;
- int f;
+ void *result;
+ int32_t result_len;
+ int32_t l = 0;
+ int32_t f;
int argc = ZEND_NUM_ARGS();
HashPosition pos_str, pos_from, pos_repl, pos_len;
@@ -2575,16 +2687,18 @@
WRONG_PARAM_COUNT;
}
- if (Z_TYPE_PP(str) != IS_ARRAY) {
- convert_to_string_ex(str);
+ if (Z_TYPE_PP(str) != IS_ARRAY && Z_TYPE_PP(str) != IS_UNICODE &&
+ Z_TYPE_PP(str) != IS_BINARY && Z_TYPE_PP(str) != IS_STRING) {
+ convert_to_text_ex(str);
}
- if (Z_TYPE_PP(repl) != IS_ARRAY) {
- convert_to_string_ex(repl);
+ if (Z_TYPE_PP(repl) != IS_ARRAY && Z_TYPE_PP(repl) != IS_UNICODE &&
+ Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
+ convert_to_text_ex(repl);
}
if (Z_TYPE_PP(from) != IS_ARRAY) {
convert_to_long_ex(from);
+ f = Z_LVAL_PP(from);
}
-
if (argc > 3) {
if (Z_TYPE_PP(len) != IS_ARRAY) {
convert_to_long_ex(len);
@@ -2592,80 +2706,59 @@
}
} else {
if (Z_TYPE_PP(str) != IS_ARRAY) {
- l = Z_STRLEN_PP(str);
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ l = Z_USTRLEN_PP(str);
+ } else {
+ l = Z_STRLEN_PP(str);
+ }
}
}
- if (Z_TYPE_PP(str) == IS_STRING) {
- if (
- (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY)
- ||
- (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len))
- ) {
+ if (Z_TYPE_PP(str) != IS_ARRAY) {
+ if ( (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) ||
+ (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len)) ) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and
'len' should be of same type - numerical or array ");
- RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
+ RETURN_ZVAL(*str, 1, 0);
}
if (argc == 4 && Z_TYPE_PP(from) == IS_ARRAY) {
if (zend_hash_num_elements(Z_ARRVAL_PP(from)) !=
zend_hash_num_elements(Z_ARRVAL_PP(len))) {
php_error_docref(NULL TSRMLS_CC, E_WARNING,
"'from' and 'len' should have the same number of elements");
- RETURN_STRINGL(Z_STRVAL_PP(str),
Z_STRLEN_PP(str), 1);
+ RETURN_ZVAL(*str, 1, 0);
}
}
}
if (Z_TYPE_PP(str) != IS_ARRAY) {
- if (Z_TYPE_PP(from) != IS_ARRAY) {
- int repl_len = 0;
-
- f = Z_LVAL_PP(from);
-
- /* if "from" position is negative, count start position
from the end
- * of the string
- */
- if (f < 0) {
- f = Z_STRLEN_PP(str) + f;
- if (f < 0) {
- f = 0;
- }
- } else if (f > Z_STRLEN_PP(str)) {
- f = Z_STRLEN_PP(str);
- }
- /* if "length" position is negative, set it to the
length
- * needed to stop that many chars from the end of the
string
- */
- if (l < 0) {
- l = (Z_STRLEN_PP(str) - f) + l;
- if (l < 0) {
- l = 0;
- }
- }
-
- if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) {
- l = Z_STRLEN_PP(str) - f;
- }
+ if (Z_TYPE_PP(from) != IS_ARRAY ) {
if (Z_TYPE_PP(repl) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl);
if (SUCCESS ==
zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl,
&pos_repl)) {
- convert_to_string_ex(tmp_repl);
- repl_len = Z_STRLEN_PP(tmp_repl);
+ if (Z_TYPE_PP(repl) != IS_UNICODE &&
Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
+ convert_to_text_ex(tmp_repl);
+ }
+ } else {
+ tmp_repl = NULL;
}
} else {
- repl_len = Z_STRLEN_PP(repl);
+ tmp_repl = repl;
}
- result_len = Z_STRLEN_PP(str) - l + repl_len;
- result = emalloc(result_len + 1);
- memcpy(result, Z_STRVAL_PP(str), f);
- if (repl_len) {
- memcpy((result + f), (Z_TYPE_PP(repl) ==
IS_ARRAY ? Z_STRVAL_PP(tmp_repl) : Z_STRVAL_PP(repl)), repl_len);
- }
- memcpy((result + f + repl_len), Z_STRVAL_PP(str) + f +
l, Z_STRLEN_PP(str) - f - l);
- result[result_len] = '\0';
- RETURN_STRINGL(result, result_len, 0);
+ if (Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl))
+ php_unify_string_types(str, tmp_repl TSRMLS_CC);
+ php_adjust_limits(str, &f, &l);
+ result_len = php_do_substr_replace(&result, str,
tmp_repl, f, l TSRMLS_CC);
+
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ RETURN_UNICODEL((UChar *)result, result_len, 0);
+ } else if (Z_TYPE_PP(str) == IS_BINARY) {
+ RETURN_BINARYL((char *)result, result_len, 0);
+ } else {
+ RETURN_STRINGL((char *)result, result_len, 0);
+ }
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING,
"Functionality of 'from' and 'len' as arrays is not implemented.");
- RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
+ RETURN_ZVAL(*str, 1, 0);
}
} else { /* str is array of strings */
array_init(return_value);
@@ -2673,104 +2766,76 @@
if (Z_TYPE_PP(from) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(from),
&pos_from);
}
-
if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(len),
&pos_len);
}
-
if (Z_TYPE_PP(repl) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl),
&pos_repl);
}
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(str), &pos_str);
while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(str), (void
**) &tmp_str, &pos_str) == SUCCESS) {
- convert_to_string_ex(tmp_str);
+ if (Z_TYPE_PP(tmp_str) != IS_UNICODE &&
Z_TYPE_PP(tmp_str) != IS_BINARY && Z_TYPE_PP(tmp_str) != IS_STRING) {
+ convert_to_text_ex(tmp_str);
+ }
if (Z_TYPE_PP(from) == IS_ARRAY) {
if (SUCCESS ==
zend_hash_get_current_data_ex(Z_ARRVAL_PP(from), (void **) &tmp_from,
&pos_from)) {
convert_to_long_ex(tmp_from);
-
f = Z_LVAL_PP(tmp_from);
- if (f < 0) {
- f = Z_STRLEN_PP(tmp_str) + f;
- if (f < 0) {
- f = 0;
- }
- } else if (f > Z_STRLEN_PP(tmp_str)) {
- f = Z_STRLEN_PP(tmp_str);
- }
zend_hash_move_forward_ex(Z_ARRVAL_PP(from), &pos_from);
} else {
f = 0;
}
- } else {
- f = Z_LVAL_PP(from);
- if (f < 0) {
- f = Z_STRLEN_PP(tmp_str) + f;
- if (f < 0) {
- f = 0;
- }
- } else if (f > Z_STRLEN_PP(tmp_str)) {
- f = Z_STRLEN_PP(tmp_str);
- }
}
- if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
+ if (argc > 3 && (Z_TYPE_PP(len) == IS_ARRAY)) {
if (SUCCESS ==
zend_hash_get_current_data_ex(Z_ARRVAL_PP(len), (void **) &tmp_len, &pos_len)) {
convert_to_long_ex(tmp_len);
-
l = Z_LVAL_PP(tmp_len);
zend_hash_move_forward_ex(Z_ARRVAL_PP(len), &pos_len);
} else {
- l = Z_STRLEN_PP(tmp_str);
+ if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
+ l = Z_USTRLEN_PP(tmp_str);
+ } else {
+ l = Z_STRLEN_PP(tmp_str);
+ }
}
- } else if (argc > 3) {
- l = Z_LVAL_PP(len);
+ } else if (argc > 3) {
+ /* 'l' parsed & set at top of funcn */
} else {
- l = Z_STRLEN_PP(tmp_str);
- }
-
- if (l < 0) {
- l = (Z_STRLEN_PP(tmp_str) - f) + l;
- if (l < 0) {
- l = 0;
+ if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
+ l = Z_USTRLEN_PP(tmp_str);
+ } else {
+ l = Z_STRLEN_PP(tmp_str);
}
}
- if (((unsigned) f + (unsigned) l) >
Z_STRLEN_PP(tmp_str)) {
- l = Z_STRLEN_PP(tmp_str) - f;
- }
-
- result_len = Z_STRLEN_PP(tmp_str) - l;
-
if (Z_TYPE_PP(repl) == IS_ARRAY) {
if (SUCCESS ==
zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl,
&pos_repl)) {
- convert_to_string_ex(tmp_repl);
- result_len += Z_STRLEN_PP(tmp_repl);
+ if (Z_TYPE_PP(repl) != IS_UNICODE &&
Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
+ convert_to_text_ex(tmp_repl);
+ }
zend_hash_move_forward_ex(Z_ARRVAL_PP(repl), &pos_repl);
- result = emalloc(result_len + 1);
-
- memcpy(result, Z_STRVAL_PP(tmp_str), f);
- memcpy((result + f),
Z_STRVAL_PP(tmp_repl), Z_STRLEN_PP(tmp_repl));
- memcpy((result + f +
Z_STRLEN_PP(tmp_repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f
- l);
} else {
- result = emalloc(result_len + 1);
-
- memcpy(result, Z_STRVAL_PP(tmp_str), f);
- memcpy((result + f),
Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
+ tmp_repl = NULL;
}
} else {
- result_len += Z_STRLEN_PP(repl);
-
- result = emalloc(result_len + 1);
-
- memcpy(result, Z_STRVAL_PP(tmp_str), f);
- memcpy((result + f), Z_STRVAL_PP(repl),
Z_STRLEN_PP(repl));
- memcpy((result + f + Z_STRLEN_PP(repl)),
Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
+ tmp_repl = repl;
}
- result[result_len] = '\0';
- add_next_index_stringl(return_value, result,
result_len, 0);
+ if (Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl))
+ php_unify_string_types(tmp_str, tmp_repl
TSRMLS_CC);
+ php_adjust_limits(tmp_str, &f, &l);
+ result_len = php_do_substr_replace(&result, tmp_str,
tmp_repl, f, l TSRMLS_CC);
+
+ if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
+ add_next_index_unicodel(return_value, (UChar
*)result, result_len, 0);
+ } else if (Z_TYPE_PP(tmp_str) == IS_BINARY) {
+ add_next_index_binaryl(return_value, (char
*)result, result_len, 0);
+ } else {
+ add_next_index_stringl(return_value, (char
*)result, result_len, 0);
+ }
zend_hash_move_forward_ex(Z_ARRVAL_PP(str), &pos_str);
} /*while*/
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php