Author: leo
Date: Thu Nov 10 08:12:19 2005
New Revision: 9884

Modified:
   trunk/src/string.c
   trunk/t/op/string_cs.t
Log:
unicode improvements - string_replace

* string_replace should now handle all charsets and encodings
* 2 tests

More tests welcome


Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Thu Nov 10 08:12:19 2005
@@ -1152,17 +1152,6 @@ string_replace(Interp *interpreter, STRI
 
     true_offset = (UINTVAL)offset;
     true_length = (UINTVAL)length;
-
-    /* may have different reps..... */
-    if ( !(cs = string_rep_compatible(interpreter, src, rep, &enc))) {
-        internal_exception(UNIMPLEMENTED,
-                "Cross-type string replace (%s/%s) (%s/%s) unsupported",
-                ((ENCODING *)(src->encoding))->name,
-                ((CHARSET *)(src->charset))->name,
-                ((ENCODING *)(rep->encoding))->name,
-                ((CHARSET *)(rep->charset))->name);
-    }
-
     /* abs(-offset) may not be > strlen-1 */
     if (offset < 0) {
         true_offset = (UINTVAL)(src->strlen + offset);
@@ -1180,6 +1169,23 @@ string_replace(Interp *interpreter, STRI
         true_length = (UINTVAL)(src->strlen - true_offset);
     }
 
+    /* Save the substring that is replaced for the return value */
+    if (d != NULL) {
+        dest = CHARSET_GET_CODEPOINTS(interpreter, src,
+                true_offset, true_length);
+        *d = dest;
+    }
+
+    /* may have different reps..... */
+    if ( !(cs = string_rep_compatible(interpreter, src, rep, &enc))) {
+        Parrot_utf16_encoding_ptr->to_encoding(interpreter, src);
+        rep = Parrot_utf16_encoding_ptr->copy_to_encoding(interpreter, rep);
+    }
+    else {
+        src->charset = cs;
+        src->encoding = enc;
+    }
+
     /* get byte position of the part that will be replaced */
     ENCODING_ITER_INIT(interpreter, src, &iter);
     iter.set_position(interpreter, &iter, true_offset);
@@ -1192,26 +1198,7 @@ string_replace(Interp *interpreter, STRI
         internal_exception(SUBSTR_OUT_OF_STRING,
                 "replace: subend somehow is less than substart");
     }
-    /* Save the substring that is replaced for the return value */
-    if (d != NULL) {
-        UINTVAL length_bytes = string_max_bytes(interpreter, src, true_length);
-
-        dest = string_make_empty(interpreter, enum_stringrep_one, 
length_bytes);
-        dest->charset = src->charset;
-        dest->encoding = src->encoding;
-
-        mem_sys_memcopy(dest->strstart,
-                (char *)src->strstart + start_byte,
-                end_byte - start_byte);
-
-        dest->bufused = end_byte - start_byte;
-        dest->strlen = true_length;
-
-        *d = dest;
-    }
 
-    src->charset = cs;
-    src->encoding = enc;
     /* Now do the replacement */
 
     /*

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Thu Nov 10 08:12:19 2005
@@ -16,7 +16,7 @@ Tests charset support.
 
 =cut
 
-use Parrot::Test tests => 37;
+use Parrot::Test tests => 39;
 use Test::More;
 
 output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -618,3 +618,38 @@ CODE
 10 20
 T\xc3\xb6tsch Leo
 OUTPUT
+
+output_is( <<'CODE', <<"OUTPUT", "utf16 substr");
+    set S0, iso-8859-1:"T�tsch"
+    find_charset I0, "unicode"
+    trans_charset S1, S0, I0
+    find_encoding I0, "utf16"
+    trans_encoding S1, S1, I0
+    substr S2, S1, 1, 2
+    find_encoding I0, "utf8"
+    trans_encoding S2, S2, I0
+    print S2
+    print "\n"
+    end
+CODE
+\xc3\xb6t
+OUTPUT
+output_is( <<'CODE', <<"OUTPUT", "utf16 replace");
+    set S0, iso-8859-1:"T�tsch"
+    find_charset I0, "unicode"
+    trans_charset S1, S0, I0
+    find_encoding I0, "utf16"
+    trans_encoding S1, S1, I0
+    substr S2, S1, 1, 1, "oe"
+    find_encoding I0, "utf8"
+    trans_encoding S2, S2, I0
+    trans_encoding S1, S1, I0
+    print S2
+    print "\n"
+    print S1
+    print "\n"
+    end
+CODE
+\xc3\xb6
+Toetsch
+OUTPUT

Reply via email to