Author: leo
Date: Thu Nov 10 06:42:46 2005
New Revision: 9880

Modified:
   trunk/encodings/fixed_8.c
   trunk/encodings/ucs2.c
   trunk/encodings/utf16.c
   trunk/encodings/utf8.c
Log:
unicode improvements - string_substr

* string_substr should now handle all charsets and encodings
* fix some usage of UChar outside PARROT_HAS_ICU

Tests welcome


Modified: trunk/encodings/fixed_8.c
==============================================================================
--- trunk/encodings/fixed_8.c   (original)
+++ trunk/encodings/fixed_8.c   Thu Nov 10 06:42:46 2005
@@ -118,8 +118,6 @@ get_bytes_inplace(Interp *interpreter, S
        UINTVAL offset, UINTVAL count, STRING *return_string)
 {
     Parrot_reuse_COW_reference(interpreter, source_string, return_string);
-    return_string->encoding = source_string->encoding;
-    return_string->charset = source_string->charset;
 
     return_string->strstart = (char *)return_string->strstart + offset ;
     return_string->bufused = count;

Modified: trunk/encodings/ucs2.c
==============================================================================
--- trunk/encodings/ucs2.c      (original)
+++ trunk/encodings/ucs2.c      Thu Nov 10 06:42:46 2005
@@ -69,16 +69,27 @@ copy_to_encoding(Interp *interpreter, ST
 static UINTVAL
 get_codepoint(Interp *interpreter, const STRING *src, UINTVAL offset)
 {
+#if PARROT_HAS_ICU
     UChar *s = (UChar*) src->strstart;
     return s[offset];
+#else
+    real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+            "no ICU lib loaded");
+    return 0;
+#endif
 }
 
 static void
 set_codepoint(Interp *interpreter, STRING *src,
        UINTVAL offset, UINTVAL codepoint)
 {
+#if PARROT_HAS_ICU
     UChar *s = (UChar*) src->strstart;
     s[offset] = codepoint;
+#else
+    real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+            "no ICU lib loaded");
+#endif
 }
 
 static UINTVAL
@@ -99,18 +110,23 @@ static STRING *
 get_codepoints(Interp *interpreter, STRING *src,
        UINTVAL offset, UINTVAL count)
 {
-    String_iter iter;
-    UINTVAL start;
-    STRING *return_string = Parrot_make_COW_reference(interpreter,
-           src);
-    return_string->encoding = src->encoding;
-    return_string->charset = src->charset;
-    iter_init(interpreter, src, &iter);
-    iter.set_position(interpreter, &iter, offset);
-    start = iter.bytepos;
-    return_string->strstart = (char *)return_string->strstart + start ;
-    iter.set_position(interpreter, &iter, offset + count);
-    return_string->bufused = iter.bytepos - start;
+    STRING *return_string = Parrot_make_COW_reference(interpreter, src);
+#if PARROT_HAS_ICU
+    return_string->strstart = (char*)src->strstart + offset * sizeof(UChar);
+    return_string->bufused = count * sizeof(UChar);
+#else
+    {
+        String_iter iter;
+        UINTVAL start;
+
+        iter_init(interpreter, src, &iter);
+        iter.set_position(interpreter, &iter, offset);
+        start = iter.bytepos;
+        return_string->strstart = (char *)return_string->strstart + start;
+        iter.set_position(interpreter, &iter, offset + count);
+        return_string->bufused = iter.bytepos - start;
+    }
+#endif
     return_string->strlen = count;
     return_string->hashval = 0;
     return return_string;

Modified: trunk/encodings/utf16.c
==============================================================================
--- trunk/encodings/utf16.c     (original)
+++ trunk/encodings/utf16.c     Thu Nov 10 06:42:46 2005
@@ -194,8 +194,6 @@ get_codepoints(Interp *interpreter, STRI
     UINTVAL start;
     STRING *return_string = Parrot_make_COW_reference(interpreter,
            src);
-    return_string->encoding = src->encoding;
-    return_string->charset = src->charset;
     iter_init(interpreter, src, &iter);
     iter.set_position(interpreter, &iter, offset);
     start = iter.bytepos;
@@ -207,30 +205,29 @@ get_codepoints(Interp *interpreter, STRI
     return return_string;
 }
 
+
 static STRING *
-get_bytes(Interp *interpreter, STRING *src,
-       UINTVAL offset, UINTVAL count)
+get_codepoints_inplace(Interp *interpreter, STRING *src,
+       UINTVAL offset, UINTVAL count, STRING *return_string)
 {
-    STRING *return_string = Parrot_make_COW_reference(interpreter,
-           src);
-    return_string->encoding = src->encoding;    /* XXX */
-    return_string->charset = src->charset;
-
-    return_string->strstart = (char *)return_string->strstart + offset ;
-    return_string->bufused = count;
-
+    String_iter iter;
+    UINTVAL start;
+    Parrot_reuse_COW_reference(interpreter, src, return_string);
+    iter_init(interpreter, src, &iter);
+    iter.set_position(interpreter, &iter, offset);
+    start = iter.bytepos;
+    return_string->strstart = (char *)return_string->strstart + start ;
+    iter.set_position(interpreter, &iter, offset + count);
+    return_string->bufused = iter.bytepos - start;
     return_string->strlen = count;
     return_string->hashval = 0;
-
     return return_string;
 }
 
-
 static STRING *
-get_codepoints_inplace(Interp *interpreter, STRING *src,
-       UINTVAL offset, UINTVAL count, STRING *dest_string)
+get_bytes(Interp *interpreter, STRING *src,
+       UINTVAL offset, UINTVAL count)
 {
-
     UNIMPL;
     return NULL;
 }

Modified: trunk/encodings/utf8.c
==============================================================================
--- trunk/encodings/utf8.c      (original)
+++ trunk/encodings/utf8.c      Thu Nov 10 06:42:46 2005
@@ -415,8 +415,6 @@ get_codepoints(Interp *interpreter, STRI
     UINTVAL start;
     STRING *return_string = Parrot_make_COW_reference(interpreter,
            src);
-    return_string->encoding = src->encoding;
-    return_string->charset = src->charset;
     iter_init(interpreter, src, &iter);
     iter.set_position(interpreter, &iter, offset);
     start = iter.bytepos;
@@ -449,11 +447,20 @@ get_bytes(Interp *interpreter, STRING *s
 
 static STRING *
 get_codepoints_inplace(Interp *interpreter, STRING *src,
-       UINTVAL offset, UINTVAL count, STRING *dest_string)
+       UINTVAL offset, UINTVAL count, STRING *return_string)
 {
-
-    UNIMPL;
-    return NULL;
+    String_iter iter;
+    UINTVAL start;
+    Parrot_reuse_COW_reference(interpreter, src, return_string);
+    iter_init(interpreter, src, &iter);
+    iter.set_position(interpreter, &iter, offset);
+    start = iter.bytepos;
+    return_string->strstart = (char *)return_string->strstart + start ;
+    iter.set_position(interpreter, &iter, offset + count);
+    return_string->bufused = iter.bytepos - start;
+    return_string->strlen = count;
+    return_string->hashval = 0;
+    return return_string;
 }
 
 static STRING *

Reply via email to