rolland Thu Sep 8 01:05:38 2005 EDT Modified files: /php-src/ext/standard string.c Log: - Unicode impl of substr_replace()
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.478&r2=1.479&ty=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.478 php-src/ext/standard/string.c:1.479 --- php-src/ext/standard/string.c:1.478 Wed Sep 7 03:00:03 2005 +++ php-src/ext/standard/string.c Thu Sep 8 01:05:36 2005 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: string.c,v 1.478 2005/09/07 07:00:03 dmitry Exp $ */ +/* $Id: string.c,v 1.479 2005/09/08 05:05:36 rolland Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -2553,6 +2553,118 @@ /* }}} */ +/* {{{ php_unify_string_types + */ +PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC) +{ + if (p == NULL || q == NULL) { + return; + } + + if (Z_TYPE_PP(p) == IS_UNICODE) { + if (Z_TYPE_PP(q) == IS_BINARY) { + convert_to_binary_ex(p); + } else { + convert_to_unicode_ex(q); + } + } else if (Z_TYPE_PP(p) == IS_BINARY) { + convert_to_binary_ex(q); + } else { + if (Z_TYPE_PP(q) == IS_BINARY) { + convert_to_binary_ex(p); + } else { + convert_to_string_ex(q); + } + } +} +/* {{{ */ + +/* {{{ php_adjust_limits + */ +PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l) +{ + int32_t i, str_codepts; + + if (Z_TYPE_PP(str) == IS_UNICODE) { + i = 0; str_codepts = 0; + while (i < Z_USTRLEN_PP(str)) { + U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str)); + str_codepts++; + } + } else { + str_codepts = Z_STRLEN_PP(str); + } + + /* If "from" position is negative, count start position from the end + * of the string */ + if (*f < 0) { + *f = str_codepts + *f; + if (*f < 0) { + *f = 0; + } + } else if (*f > str_codepts) { + *f = str_codepts; + } + /* If "length" position is negative, set it to the length + * needed to stop that many codepts/chars from the end of the string */ + if (*l < 0) { + *l = str_codepts - *f + *l; + if (*l < 0) { + *l = 0; + } + } + if (((unsigned)(*f) + (unsigned)(*l)) > str_codepts) { + *l = str_codepts - *f; + } +} +/* }}} */ + +/* {{{ php_do_substr_replace + */ +PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl, int32_t f, int32_t l TSRMLS_DC) +{ + void *buf; + int32_t buf_len, idx; + UChar ch; + + if (Z_TYPE_PP(str) == IS_UNICODE) { + buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl) + 1)); + + /* buf_len is codept count here */ + buf_len = 0; idx = 0; + while (f-- > 0) { + U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch); + buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf + buf_len); + } + if (repl != NULL) { + u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl), Z_USTRLEN_PP(repl)); + buf_len += Z_USTRLEN_PP(repl); + } + U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l); + u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx, Z_USTRLEN_PP(str) - idx); + buf_len += (Z_USTRLEN_PP(str) - idx); + + *((UChar *)buf + buf_len) = 0; + buf = erealloc(buf, UBYTES(buf_len + 1)); + } else { + /* buf_len is char count here */ + buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl); + buf = emalloc(buf_len + 1); + + memcpy(buf, Z_STRVAL_PP(str), f); + if (repl != NULL ) { + memcpy((char *)buf + f, Z_STRVAL_PP(repl), Z_STRLEN_PP(repl)); + } + memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l); + + *((char *)buf + buf_len) = '\0'; + } + + *result = buf; + return buf_len; +} +/* }}} */ + /* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed length]) Replaces part of a string with another string */ PHP_FUNCTION(substr_replace) @@ -2561,10 +2673,10 @@ zval **from; zval **len = NULL; zval **repl; - char *result; - int result_len; - int l = 0; - int f; + void *result; + int32_t result_len; + int32_t l = 0; + int32_t f; int argc = ZEND_NUM_ARGS(); HashPosition pos_str, pos_from, pos_repl, pos_len; @@ -2575,16 +2687,18 @@ WRONG_PARAM_COUNT; } - if (Z_TYPE_PP(str) != IS_ARRAY) { - convert_to_string_ex(str); + if (Z_TYPE_PP(str) != IS_ARRAY && Z_TYPE_PP(str) != IS_UNICODE && + Z_TYPE_PP(str) != IS_BINARY && Z_TYPE_PP(str) != IS_STRING) { + convert_to_text_ex(str); } - if (Z_TYPE_PP(repl) != IS_ARRAY) { - convert_to_string_ex(repl); + if (Z_TYPE_PP(repl) != IS_ARRAY && Z_TYPE_PP(repl) != IS_UNICODE && + Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) { + convert_to_text_ex(repl); } if (Z_TYPE_PP(from) != IS_ARRAY) { convert_to_long_ex(from); + f = Z_LVAL_PP(from); } - if (argc > 3) { if (Z_TYPE_PP(len) != IS_ARRAY) { convert_to_long_ex(len); @@ -2592,80 +2706,59 @@ } } else { if (Z_TYPE_PP(str) != IS_ARRAY) { - l = Z_STRLEN_PP(str); + if (Z_TYPE_PP(str) == IS_UNICODE) { + l = Z_USTRLEN_PP(str); + } else { + l = Z_STRLEN_PP(str); + } } } - if (Z_TYPE_PP(str) == IS_STRING) { - if ( - (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) - || - (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len)) - ) { + if (Z_TYPE_PP(str) != IS_ARRAY) { + if ( (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) || + (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len)) ) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should be of same type - numerical or array "); - RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); + RETURN_ZVAL(*str, 1, 0); } if (argc == 4 && Z_TYPE_PP(from) == IS_ARRAY) { if (zend_hash_num_elements(Z_ARRVAL_PP(from)) != zend_hash_num_elements(Z_ARRVAL_PP(len))) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should have the same number of elements"); - RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); + RETURN_ZVAL(*str, 1, 0); } } } if (Z_TYPE_PP(str) != IS_ARRAY) { - if (Z_TYPE_PP(from) != IS_ARRAY) { - int repl_len = 0; - - f = Z_LVAL_PP(from); - - /* if "from" position is negative, count start position from the end - * of the string - */ - if (f < 0) { - f = Z_STRLEN_PP(str) + f; - if (f < 0) { - f = 0; - } - } else if (f > Z_STRLEN_PP(str)) { - f = Z_STRLEN_PP(str); - } - /* if "length" position is negative, set it to the length - * needed to stop that many chars from the end of the string - */ - if (l < 0) { - l = (Z_STRLEN_PP(str) - f) + l; - if (l < 0) { - l = 0; - } - } - - if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) { - l = Z_STRLEN_PP(str) - f; - } + if (Z_TYPE_PP(from) != IS_ARRAY ) { if (Z_TYPE_PP(repl) == IS_ARRAY) { zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl); if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) { - convert_to_string_ex(tmp_repl); - repl_len = Z_STRLEN_PP(tmp_repl); + if (Z_TYPE_PP(repl) != IS_UNICODE && Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) { + convert_to_text_ex(tmp_repl); + } + } else { + tmp_repl = NULL; } } else { - repl_len = Z_STRLEN_PP(repl); + tmp_repl = repl; } - result_len = Z_STRLEN_PP(str) - l + repl_len; - result = emalloc(result_len + 1); - memcpy(result, Z_STRVAL_PP(str), f); - if (repl_len) { - memcpy((result + f), (Z_TYPE_PP(repl) == IS_ARRAY ? Z_STRVAL_PP(tmp_repl) : Z_STRVAL_PP(repl)), repl_len); - } - memcpy((result + f + repl_len), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l); - result[result_len] = '\0'; - RETURN_STRINGL(result, result_len, 0); + if (Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl)) + php_unify_string_types(str, tmp_repl TSRMLS_CC); + php_adjust_limits(str, &f, &l); + result_len = php_do_substr_replace(&result, str, tmp_repl, f, l TSRMLS_CC); + + if (Z_TYPE_PP(str) == IS_UNICODE) { + RETURN_UNICODEL((UChar *)result, result_len, 0); + } else if (Z_TYPE_PP(str) == IS_BINARY) { + RETURN_BINARYL((char *)result, result_len, 0); + } else { + RETURN_STRINGL((char *)result, result_len, 0); + } } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Functionality of 'from' and 'len' as arrays is not implemented."); - RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); + RETURN_ZVAL(*str, 1, 0); } } else { /* str is array of strings */ array_init(return_value); @@ -2673,104 +2766,76 @@ if (Z_TYPE_PP(from) == IS_ARRAY) { zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(from), &pos_from); } - if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) { zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(len), &pos_len); } - if (Z_TYPE_PP(repl) == IS_ARRAY) { zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl); } zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(str), &pos_str); while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(str), (void **) &tmp_str, &pos_str) == SUCCESS) { - convert_to_string_ex(tmp_str); + if (Z_TYPE_PP(tmp_str) != IS_UNICODE && Z_TYPE_PP(tmp_str) != IS_BINARY && Z_TYPE_PP(tmp_str) != IS_STRING) { + convert_to_text_ex(tmp_str); + } if (Z_TYPE_PP(from) == IS_ARRAY) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(from), (void **) &tmp_from, &pos_from)) { convert_to_long_ex(tmp_from); - f = Z_LVAL_PP(tmp_from); - if (f < 0) { - f = Z_STRLEN_PP(tmp_str) + f; - if (f < 0) { - f = 0; - } - } else if (f > Z_STRLEN_PP(tmp_str)) { - f = Z_STRLEN_PP(tmp_str); - } zend_hash_move_forward_ex(Z_ARRVAL_PP(from), &pos_from); } else { f = 0; } - } else { - f = Z_LVAL_PP(from); - if (f < 0) { - f = Z_STRLEN_PP(tmp_str) + f; - if (f < 0) { - f = 0; - } - } else if (f > Z_STRLEN_PP(tmp_str)) { - f = Z_STRLEN_PP(tmp_str); - } } - if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) { + if (argc > 3 && (Z_TYPE_PP(len) == IS_ARRAY)) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(len), (void **) &tmp_len, &pos_len)) { convert_to_long_ex(tmp_len); - l = Z_LVAL_PP(tmp_len); zend_hash_move_forward_ex(Z_ARRVAL_PP(len), &pos_len); } else { - l = Z_STRLEN_PP(tmp_str); + if (Z_TYPE_PP(tmp_str) == IS_UNICODE) { + l = Z_USTRLEN_PP(tmp_str); + } else { + l = Z_STRLEN_PP(tmp_str); + } } - } else if (argc > 3) { - l = Z_LVAL_PP(len); + } else if (argc > 3) { + /* 'l' parsed & set at top of funcn */ } else { - l = Z_STRLEN_PP(tmp_str); - } - - if (l < 0) { - l = (Z_STRLEN_PP(tmp_str) - f) + l; - if (l < 0) { - l = 0; + if (Z_TYPE_PP(tmp_str) == IS_UNICODE) { + l = Z_USTRLEN_PP(tmp_str); + } else { + l = Z_STRLEN_PP(tmp_str); } } - if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(tmp_str)) { - l = Z_STRLEN_PP(tmp_str) - f; - } - - result_len = Z_STRLEN_PP(tmp_str) - l; - if (Z_TYPE_PP(repl) == IS_ARRAY) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) { - convert_to_string_ex(tmp_repl); - result_len += Z_STRLEN_PP(tmp_repl); + if (Z_TYPE_PP(repl) != IS_UNICODE && Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) { + convert_to_text_ex(tmp_repl); + } zend_hash_move_forward_ex(Z_ARRVAL_PP(repl), &pos_repl); - result = emalloc(result_len + 1); - - memcpy(result, Z_STRVAL_PP(tmp_str), f); - memcpy((result + f), Z_STRVAL_PP(tmp_repl), Z_STRLEN_PP(tmp_repl)); - memcpy((result + f + Z_STRLEN_PP(tmp_repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l); } else { - result = emalloc(result_len + 1); - - memcpy(result, Z_STRVAL_PP(tmp_str), f); - memcpy((result + f), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l); + tmp_repl = NULL; } } else { - result_len += Z_STRLEN_PP(repl); - - result = emalloc(result_len + 1); - - memcpy(result, Z_STRVAL_PP(tmp_str), f); - memcpy((result + f), Z_STRVAL_PP(repl), Z_STRLEN_PP(repl)); - memcpy((result + f + Z_STRLEN_PP(repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l); + tmp_repl = repl; } - result[result_len] = '\0'; - add_next_index_stringl(return_value, result, result_len, 0); + if (Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl)) + php_unify_string_types(tmp_str, tmp_repl TSRMLS_CC); + php_adjust_limits(tmp_str, &f, &l); + result_len = php_do_substr_replace(&result, tmp_str, tmp_repl, f, l TSRMLS_CC); + + if (Z_TYPE_PP(tmp_str) == IS_UNICODE) { + add_next_index_unicodel(return_value, (UChar *)result, result_len, 0); + } else if (Z_TYPE_PP(tmp_str) == IS_BINARY) { + add_next_index_binaryl(return_value, (char *)result, result_len, 0); + } else { + add_next_index_stringl(return_value, (char *)result, result_len, 0); + } zend_hash_move_forward_ex(Z_ARRVAL_PP(str), &pos_str); } /*while*/
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php