Author: leo
Date: Thu Nov 10 02:25:50 2005
New Revision: 9876

Modified:
   trunk/charset/unicode.c
   trunk/src/string.c
   trunk/t/op/string_cs.t
Log:
unicode improvements - chopn

* fix chopn bufused was wrong
* it's still b0rked with immutable strings
* test


Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Thu Nov 10 02:25:50 2005
@@ -151,6 +151,9 @@ u_strToLower(UChar *dest, int32_t destCa
                 &err);
         assert(U_SUCCESS(err));
     }
+    /* downgrade if possible */
+    if (dest_len == (int)src->strlen)
+        src->encoding = Parrot_ucs2_encoding_ptr;
 #else
     real_exception(interpreter, NULL, E_LibraryNotLoadedError,
             "no ICU lib loaded");

Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Thu Nov 10 02:25:50 2005
@@ -1281,7 +1281,13 @@ C<n> is negative, cuts the string after 
 STRING *
 string_chopn(Interp *interpreter, STRING *s, INTVAL n)
 {
-    UINTVAL new_length;
+    UINTVAL new_length, uchar_size;
+    String_iter iter;
+    /*
+     * FIXME constant or external strings can't be chopped inplace
+     */
+    if (!s)
+        return NULL;
 
     if (n < 0) {
         new_length = -n;
@@ -1294,10 +1300,24 @@ string_chopn(Interp *interpreter, STRING
         else
             new_length = 0;
     }
-
-    s->strlen = new_length;
-    s->bufused = string_max_bytes(interpreter, s, new_length);
     s->hashval = 0;
+    if (!new_length || !s->strlen) {
+        s->bufused = s->strlen = 0;
+        return s;
+    }
+    uchar_size = s->bufused / s->strlen;
+    s->strlen = new_length;
+    if (s->encoding == Parrot_fixed_8_encoding_ptr) {
+        s->bufused = new_length;
+    }
+    else if (s->encoding == Parrot_ucs2_encoding_ptr) {
+        s->bufused = new_length * uchar_size;
+    }
+    else {
+        ENCODING_ITER_INIT(interpreter, s, &iter);
+        iter.set_position(interpreter, &iter, new_length);
+        s->bufused = iter.bytepos;
+    }
 
     return s;
 }

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Thu Nov 10 02:25:50 2005
@@ -16,7 +16,7 @@ Tests charset support.
 
 =cut
 
-use Parrot::Test tests => 34;
+use Parrot::Test tests => 35;
 use Test::More;
 
 output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -556,3 +556,18 @@ CODE
 84_214_84_83_67_72_
 OUTPUT
 
+output_is( <<'CODE', <<"OUTPUT", "chopn utf8");
+    set S0, iso-8859-1:"TT��"
+    find_charset I0, "unicode"
+    trans_charset S1, S0, I0
+    chopn S1, 2
+    print_item S1
+    length I0, S1
+    print_item I0
+    .include "stringinfo.pasm"
+    stringinfo I0, S1, .STRINGINFO_BUFUSED
+    print_item I0
+    print_newline
+CODE
+TT 2 2
+OUTPUT

Reply via email to