rolland         Thu Sep  8 01:05:38 2005 EDT

  Modified files:              
    /php-src/ext/standard       string.c 
  Log:
  - Unicode impl of substr_replace()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.478&r2=1.479&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.478 php-src/ext/standard/string.c:1.479
--- php-src/ext/standard/string.c:1.478 Wed Sep  7 03:00:03 2005
+++ php-src/ext/standard/string.c       Thu Sep  8 01:05:36 2005
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: string.c,v 1.478 2005/09/07 07:00:03 dmitry Exp $ */
+/* $Id: string.c,v 1.479 2005/09/08 05:05:36 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2553,6 +2553,118 @@
 /* }}} */
 
 
+/* {{{ php_unify_string_types
+ */
+PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC)
+{
+       if (p == NULL || q == NULL) {
+               return;
+       }
+
+       if (Z_TYPE_PP(p) == IS_UNICODE) {
+               if (Z_TYPE_PP(q) == IS_BINARY) {
+                       convert_to_binary_ex(p);
+               } else {
+                       convert_to_unicode_ex(q);
+               }
+       } else if (Z_TYPE_PP(p) == IS_BINARY) {
+               convert_to_binary_ex(q);
+       } else {
+               if (Z_TYPE_PP(q) == IS_BINARY) {
+                       convert_to_binary_ex(p);
+               } else {
+                       convert_to_string_ex(q);
+               }
+       }
+}
+/* {{{ */
+
+/* {{{ php_adjust_limits
+ */
+PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l)
+{
+       int32_t i, str_codepts;
+
+       if (Z_TYPE_PP(str) == IS_UNICODE) {
+               i = 0; str_codepts = 0;
+               while (i < Z_USTRLEN_PP(str)) {
+                       U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str));
+                       str_codepts++;
+               }
+       } else {
+               str_codepts = Z_STRLEN_PP(str);
+       }
+
+       /* If "from" position is negative, count start position from the end
+        * of the string */
+       if (*f < 0) {
+               *f = str_codepts + *f;
+               if (*f < 0) {
+                       *f = 0;
+               }
+       } else if (*f > str_codepts) {
+               *f = str_codepts;
+       }
+       /* If "length" position is negative, set it to the length
+        * needed to stop that many codepts/chars from the end of the string */
+       if (*l < 0) {
+               *l = str_codepts - *f + *l;
+               if (*l < 0) {
+                       *l = 0;
+               }
+       }
+       if (((unsigned)(*f) + (unsigned)(*l)) > str_codepts) {
+               *l = str_codepts - *f;
+       }
+}
+/* }}} */
+
+/* {{{ php_do_substr_replace
+ */
+PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl, 
int32_t f, int32_t l TSRMLS_DC)
+{
+       void *buf;
+       int32_t buf_len, idx;
+       UChar ch;
+
+       if (Z_TYPE_PP(str) == IS_UNICODE) {
+               buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl) 
+ 1));
+
+               /* buf_len is codept count here */
+               buf_len = 0; idx = 0;
+               while (f-- > 0) {
+                       U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch);
+                       buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf + 
buf_len);
+               }
+               if (repl != NULL) {
+                       u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl), 
Z_USTRLEN_PP(repl));
+                       buf_len += Z_USTRLEN_PP(repl);
+               }
+               U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l);
+               u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx, 
Z_USTRLEN_PP(str) - idx);
+               buf_len += (Z_USTRLEN_PP(str) - idx);
+
+               *((UChar *)buf + buf_len) = 0;
+               buf = erealloc(buf, UBYTES(buf_len + 1));
+       } else {
+               /* buf_len is char count here */
+               buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl);
+               buf = emalloc(buf_len + 1);
+
+               memcpy(buf, Z_STRVAL_PP(str), f);
+               if (repl != NULL ) {
+                       memcpy((char *)buf + f, Z_STRVAL_PP(repl), 
Z_STRLEN_PP(repl));
+               }
+               memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) + 
f + l, Z_STRLEN_PP(str) - f - l);
+
+               *((char *)buf + buf_len) = '\0';
+       }
+
+       *result = buf;
+       return buf_len;
+}
+/* }}} */
+
 /* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed 
length])
    Replaces part of a string with another string */
 PHP_FUNCTION(substr_replace)
@@ -2561,10 +2673,10 @@
        zval **from;
        zval **len = NULL;
        zval **repl;
-       char *result;
-       int result_len;
-       int l = 0;
-       int f;
+       void *result;
+       int32_t result_len;
+       int32_t l = 0;
+       int32_t f;
        int argc = ZEND_NUM_ARGS();
 
        HashPosition pos_str, pos_from, pos_repl, pos_len;
@@ -2575,16 +2687,18 @@
                WRONG_PARAM_COUNT;
        }
        
-       if (Z_TYPE_PP(str) != IS_ARRAY) {
-               convert_to_string_ex(str);
+       if (Z_TYPE_PP(str) != IS_ARRAY && Z_TYPE_PP(str) != IS_UNICODE &&
+               Z_TYPE_PP(str) != IS_BINARY && Z_TYPE_PP(str) != IS_STRING) {
+               convert_to_text_ex(str);
        }
-       if (Z_TYPE_PP(repl) != IS_ARRAY) {
-               convert_to_string_ex(repl);
+       if (Z_TYPE_PP(repl) != IS_ARRAY && Z_TYPE_PP(repl) != IS_UNICODE &&
+               Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
+               convert_to_text_ex(repl);
        }
        if (Z_TYPE_PP(from) != IS_ARRAY) {
                convert_to_long_ex(from);
+               f = Z_LVAL_PP(from);
        }
-
        if (argc > 3) {
                if (Z_TYPE_PP(len) != IS_ARRAY) {
                        convert_to_long_ex(len);
@@ -2592,80 +2706,59 @@
                }
        } else {
                if (Z_TYPE_PP(str) != IS_ARRAY) {
-                       l = Z_STRLEN_PP(str);
+                       if (Z_TYPE_PP(str) == IS_UNICODE) {
+                               l = Z_USTRLEN_PP(str);
+                       } else {
+                               l = Z_STRLEN_PP(str);
+                       }
                }
        }
 
-       if (Z_TYPE_PP(str) == IS_STRING) {
-               if (
-                       (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) 
-                       || 
-                       (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len))
-               ) {
+       if (Z_TYPE_PP(str) != IS_ARRAY) {
+               if ( (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) ||
+                        (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len)) ) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 
'len' should be of same type - numerical or array ");
-                       RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);  
        
+                       RETURN_ZVAL(*str, 1, 0);
                }
                if (argc == 4 && Z_TYPE_PP(from) == IS_ARRAY) {
                        if (zend_hash_num_elements(Z_ARRVAL_PP(from)) != 
zend_hash_num_elements(Z_ARRVAL_PP(len))) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING, 
"'from' and 'len' should have the same number of elements");
-                               RETURN_STRINGL(Z_STRVAL_PP(str), 
Z_STRLEN_PP(str), 1);          
+                               RETURN_ZVAL(*str, 1, 0);
                        }
                }
        }
 
        
        if (Z_TYPE_PP(str) != IS_ARRAY) {
-               if (Z_TYPE_PP(from) != IS_ARRAY) {
-                       int repl_len = 0;
-
-                       f = Z_LVAL_PP(from);
-
-                       /* if "from" position is negative, count start position 
from the end
-                        * of the string
-                        */
-                       if (f < 0) {
-                               f = Z_STRLEN_PP(str) + f;
-                               if (f < 0) {
-                                       f = 0;
-                               }
-                       } else if (f > Z_STRLEN_PP(str)) {
-                               f = Z_STRLEN_PP(str);
-                       }
-                       /* if "length" position is negative, set it to the 
length
-                        * needed to stop that many chars from the end of the 
string
-                        */
-                       if (l < 0) {
-                               l = (Z_STRLEN_PP(str) - f) + l;
-                               if (l < 0) {
-                                       l = 0;
-                               }
-                       }
-
-                       if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) {
-                               l = Z_STRLEN_PP(str) - f;
-                       }
+               if (Z_TYPE_PP(from) != IS_ARRAY ) {
                        if (Z_TYPE_PP(repl) == IS_ARRAY) {
                                
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl);
                                if (SUCCESS == 
zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, 
&pos_repl)) {
-                                       convert_to_string_ex(tmp_repl);
-                                       repl_len = Z_STRLEN_PP(tmp_repl);
+                                       if (Z_TYPE_PP(repl) != IS_UNICODE && 
Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
+                                               convert_to_text_ex(tmp_repl);
+                                       }
+                               } else {
+                                       tmp_repl = NULL;
                                }
                        } else {
-                               repl_len = Z_STRLEN_PP(repl);
+                               tmp_repl = repl;
                        }
-                       result_len = Z_STRLEN_PP(str) - l + repl_len;
-                       result = emalloc(result_len + 1);
 
-                       memcpy(result, Z_STRVAL_PP(str), f);
-                       if (repl_len) {
-                               memcpy((result + f), (Z_TYPE_PP(repl) == 
IS_ARRAY ? Z_STRVAL_PP(tmp_repl) : Z_STRVAL_PP(repl)), repl_len);
-                       }
-                       memcpy((result + f + repl_len), Z_STRVAL_PP(str) + f + 
l, Z_STRLEN_PP(str) - f - l);
-                       result[result_len] = '\0';
-                       RETURN_STRINGL(result, result_len, 0);
+                       if (Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl))
+                               php_unify_string_types(str, tmp_repl TSRMLS_CC);
+                       php_adjust_limits(str, &f, &l);
+                       result_len = php_do_substr_replace(&result, str, 
tmp_repl, f, l TSRMLS_CC);
+
+                       if (Z_TYPE_PP(str) == IS_UNICODE) {
+                               RETURN_UNICODEL((UChar *)result, result_len, 0);
+                       } else if (Z_TYPE_PP(str) == IS_BINARY) {
+                               RETURN_BINARYL((char *)result, result_len, 0);
+                       } else {
+                               RETURN_STRINGL((char *)result, result_len, 0);
+                       }
                } else {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, 
"Functionality of 'from' and 'len' as arrays is not implemented.");
-                       RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);  
+                       RETURN_ZVAL(*str, 1, 0);        
                }
        } else { /* str is array of strings */
                array_init(return_value);
@@ -2673,104 +2766,76 @@
                if (Z_TYPE_PP(from) == IS_ARRAY) {
                        zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(from), 
&pos_from);
                }
-
                if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
                        zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(len), 
&pos_len);
                }
-
                if (Z_TYPE_PP(repl) == IS_ARRAY) {
                        zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), 
&pos_repl);
                }
 
                zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(str), &pos_str);
                while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(str), (void 
**) &tmp_str, &pos_str) == SUCCESS) {
-                       convert_to_string_ex(tmp_str);
+                       if (Z_TYPE_PP(tmp_str) != IS_UNICODE && 
Z_TYPE_PP(tmp_str) != IS_BINARY && Z_TYPE_PP(tmp_str) != IS_STRING) {
+                               convert_to_text_ex(tmp_str);
+                       }
 
                        if (Z_TYPE_PP(from) == IS_ARRAY) {
                                if (SUCCESS == 
zend_hash_get_current_data_ex(Z_ARRVAL_PP(from), (void **) &tmp_from, 
&pos_from)) {
                                        convert_to_long_ex(tmp_from);
-
                                        f = Z_LVAL_PP(tmp_from);
-                                       if (f < 0) {
-                                               f = Z_STRLEN_PP(tmp_str) + f;
-                                               if (f < 0) {
-                                                       f = 0;
-                                               }
-                                       } else if (f > Z_STRLEN_PP(tmp_str)) {
-                                               f = Z_STRLEN_PP(tmp_str);
-                                       }
                                        
zend_hash_move_forward_ex(Z_ARRVAL_PP(from), &pos_from);
                                } else {
                                        f = 0;
                                }
-                       } else {
-                               f = Z_LVAL_PP(from);
-                               if (f < 0) {
-                                       f = Z_STRLEN_PP(tmp_str) + f;
-                                       if (f < 0) {
-                                               f = 0;
-                                       }
-                               } else if (f > Z_STRLEN_PP(tmp_str)) {
-                                       f = Z_STRLEN_PP(tmp_str);
-                               }
                        }
 
-                       if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
+                       if (argc > 3 && (Z_TYPE_PP(len) == IS_ARRAY)) {
                                if (SUCCESS == 
zend_hash_get_current_data_ex(Z_ARRVAL_PP(len), (void **) &tmp_len, &pos_len)) {
                                        convert_to_long_ex(tmp_len);
-
                                        l = Z_LVAL_PP(tmp_len);
                                        
zend_hash_move_forward_ex(Z_ARRVAL_PP(len), &pos_len);
                                } else {
-                                       l = Z_STRLEN_PP(tmp_str);
+                                       if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
+                                               l = Z_USTRLEN_PP(tmp_str);
+                                       } else {
+                                               l = Z_STRLEN_PP(tmp_str);
+                                       }
                                }
-                       } else if (argc > 3) { 
-                               l = Z_LVAL_PP(len);
+                       } else if (argc > 3) {
+                               /* 'l' parsed & set at top of funcn */
                        } else {
-                               l = Z_STRLEN_PP(tmp_str);
-                       }
-
-                       if (l < 0) {
-                               l = (Z_STRLEN_PP(tmp_str) - f) + l;
-                               if (l < 0) {
-                                       l = 0;
+                               if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
+                                       l = Z_USTRLEN_PP(tmp_str);
+                               } else {
+                                       l = Z_STRLEN_PP(tmp_str);
                                }
                        }
 
-                       if (((unsigned) f + (unsigned) l) > 
Z_STRLEN_PP(tmp_str)) {
-                               l = Z_STRLEN_PP(tmp_str) - f;
-                       }
-
-                       result_len = Z_STRLEN_PP(tmp_str) - l;
-
                        if (Z_TYPE_PP(repl) == IS_ARRAY) {
                                if (SUCCESS == 
zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, 
&pos_repl)) {
-                                       convert_to_string_ex(tmp_repl);
-                                       result_len += Z_STRLEN_PP(tmp_repl);
+                                       if (Z_TYPE_PP(repl) != IS_UNICODE && 
Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
+                                               convert_to_text_ex(tmp_repl);
+                                       }
                                        
zend_hash_move_forward_ex(Z_ARRVAL_PP(repl), &pos_repl);        
-                                       result = emalloc(result_len + 1);
-
-                                       memcpy(result, Z_STRVAL_PP(tmp_str), f);
-                                       memcpy((result + f), 
Z_STRVAL_PP(tmp_repl), Z_STRLEN_PP(tmp_repl));
-                                       memcpy((result + f + 
Z_STRLEN_PP(tmp_repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f 
- l);
                                } else {
-                                       result = emalloc(result_len + 1);
-       
-                                       memcpy(result, Z_STRVAL_PP(tmp_str), f);
-                                       memcpy((result + f), 
Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
+                                       tmp_repl = NULL;
                                }
                        } else {
-                               result_len += Z_STRLEN_PP(repl);
-
-                               result = emalloc(result_len + 1);
-
-                               memcpy(result, Z_STRVAL_PP(tmp_str), f);
-                               memcpy((result + f), Z_STRVAL_PP(repl), 
Z_STRLEN_PP(repl));
-                               memcpy((result + f + Z_STRLEN_PP(repl)), 
Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
+                               tmp_repl = repl;
                        }
 
-                       result[result_len] = '\0';
-                       add_next_index_stringl(return_value, result, 
result_len, 0);
+                       if (Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl))
+                               php_unify_string_types(tmp_str, tmp_repl 
TSRMLS_CC);
+                       php_adjust_limits(tmp_str, &f, &l);
+                       result_len = php_do_substr_replace(&result, tmp_str, 
tmp_repl, f, l TSRMLS_CC);
+
+                       if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
+                               add_next_index_unicodel(return_value, (UChar 
*)result, result_len, 0);
+                       } else if (Z_TYPE_PP(tmp_str) == IS_BINARY) {
+                               add_next_index_binaryl(return_value, (char 
*)result, result_len, 0);
+                       } else {
+                               add_next_index_stringl(return_value, (char 
*)result, result_len, 0);
+                       }
 
                        zend_hash_move_forward_ex(Z_ARRVAL_PP(str), &pos_str);
                } /*while*/

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to