Author: leo
Date: Wed Nov  9 11:04:27 2005
New Revision: 9864

Modified:
   trunk/charset/unicode.c
   trunk/encodings/utf16.c
Log:
improve utf16 encoding; resize if needed

Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Wed Nov  9 11:04:27 2005
@@ -137,12 +137,13 @@ u_strToLower(UChar *dest, int32_t destCa
              UErrorCode *pErrorCode);
      */
     err = U_ZERO_ERROR;
-    result_len = u_strToLower(src->strstart, PObj_buflen(src) / 2,
+    result_len = u_strToLower(src->strstart, PObj_buflen(src) / sizeof(UChar),
             src->strstart, src->strlen,
             NULL,       /* locale = default */
             &err);
-    assert(!err);
-    src->bufused = result_len * 2;
+    /* TODO implement resizing */
+    assert(U_SUCCESS(err));
+    src->bufused = result_len * sizeof(UChar);
 #else
     real_exception(interpreter, NULL, E_LibraryNotLoadedError,
             "no ICU lib loaded");

Modified: trunk/encodings/utf16.c
==============================================================================
--- trunk/encodings/utf16.c     (original)
+++ trunk/encodings/utf16.c     Wed Nov  9 11:04:27 2005
@@ -49,7 +49,6 @@ to_encoding(Interp *interpreter, STRING 
     /*
      * TODO adapt string creation functions
      */
-    Parrot_reallocate_string(interpreter, src, 2 * src->strlen);
     src->charset  = Parrot_unicode_charset_ptr;
     src->encoding = Parrot_utf16_encoding_ptr;
     src_len = src->strlen;
@@ -66,11 +65,20 @@ to_encoding(Interp *interpreter, STRING 
 #if PARROT_HAS_ICU
     err = U_ZERO_ERROR;
     /* XXX these inplace operations are all shit (sorry) */
-    p = mem_sys_allocate(PObj_buflen(src));
-    u_strFromUTF8(p, PObj_buflen(src) / 2,
+    p = mem_sys_allocate(src_len * sizeof(UChar));
+    u_strFromUTF8(p, src_len,
             &dest_len, src->strstart, src->bufused, &err);
-    assert(!err);       /* TODO */
-    src->bufused = dest_len * 2;;
+    if (!U_SUCCESS(err)) {
+        /*
+         * have to resize - required len in UChars is in dest_len
+         */
+        p = mem_sys_realloc(p, dest_len * sizeof(UChar));
+        u_strFromUTF8(p, dest_len,
+                &dest_len, src->strstart, src->bufused, &err);
+        assert(U_SUCCESS(err));
+    }
+    src->bufused = dest_len * sizeof(UChar);
+    Parrot_reallocate_string(interpreter, src, src->bufused);
     memcpy(src->strstart, p, src->bufused);
     mem_sys_free(p);
 #else
@@ -87,6 +95,7 @@ copy_to_encoding(Interp *interpreter, ST
     UErrorCode err;
     int dest_len;
 #endif
+    int src_len;
 
     if (src->encoding == Parrot_utf16_encoding_ptr)
         return string_copy(interpreter, src);
@@ -95,17 +104,24 @@ copy_to_encoding(Interp *interpreter, ST
      * TODO adapt string creation functions
      */
     dest = new_string_header(interpreter, 0);
-    Parrot_allocate_string(interpreter, dest, 2 * src->strlen);
+    src_len = src->strlen;
+    Parrot_allocate_string(interpreter, dest, sizeof(UChar) * src_len);
+    dest->strlen   = src_len;
     dest->charset  = Parrot_unicode_charset_ptr;
     dest->encoding = Parrot_utf16_encoding_ptr;
-    dest->strlen   = src->strlen;
-    if (!src->strlen)
+    if (!src_len)
         return dest;
 #if PARROT_HAS_ICU
     err = U_ZERO_ERROR;
-    u_strFromUTF8(dest->strstart, dest->bufused,
+    u_strFromUTF8(dest->strstart, src_len,
             &dest_len, src->strstart, src->bufused, &err);
-    assert(!err);       /* TODO */
+    if (!U_SUCCESS(err)) {
+        Parrot_allocate_string(interpreter, dest, sizeof(UChar) * dest_len);
+        u_strFromUTF8(dest->strstart, dest_len,
+                &dest_len, src->strstart, src->bufused, &err);
+        assert(U_SUCCESS(err));
+    }
+    
 #else
     real_exception(interpreter, NULL, E_LibraryNotLoadedError,
             "no ICU lib loaded");
@@ -263,10 +279,13 @@ utf16_decode_and_advance(Interp *interpr
 {
     UChar *s = (UChar*) i->str->strstart;
     UINTVAL c, pos;
-    pos = i->bytepos / 2;
+    pos = i->bytepos / sizeof(UChar);
+    /* TODO either make sure that we don't go past end or use SAFE
+     *      iter versions
+     */
     U16_NEXT_UNSAFE(s, pos, c);
     i->charpos++;
-    i->bytepos = pos * 2;
+    i->bytepos = pos * sizeof(UChar);
     return c;
 }
 
@@ -275,10 +294,10 @@ utf16_encode_and_advance(Interp *interpr
 {
     UChar *s = (UChar*) i->str->strstart;
     UINTVAL pos;
-    pos = i->bytepos / 2;
+    pos = i->bytepos / sizeof(UChar);
     U16_APPEND_UNSAFE(s, pos, c);
     i->charpos++;
-    i->bytepos = pos * 2;
+    i->bytepos = pos * sizeof(UChar);
 }
 
 static void
@@ -289,7 +308,7 @@ utf16_set_position(Interp *interpreter, 
     pos = 0;
     U16_FWD_N_UNSAFE(s, pos, n);
     i->charpos = n;
-    i->bytepos = pos * 2;
+    i->bytepos = pos * sizeof(UChar);
 }
 
 #endif

Reply via email to