Author: leo
Date: Thu Nov 10 13:00:37 2005
New Revision: 9890

Modified:
   trunk/charset/ascii.c
   trunk/charset/binary.c
   trunk/charset/iso-8859-1.c
   trunk/charset/unicode.c
   trunk/encodings/utf16.c
   trunk/include/parrot/charset.h
   trunk/src/string.c
Log:
charsets - again

* removed 3 unneeded additional conversion functions
  remove macros and vtable hooks
* simplify to_charset for all charsets




Modified: trunk/charset/ascii.c
==============================================================================
--- trunk/charset/ascii.c       (original)
+++ trunk/charset/ascii.c       Thu Nov 10 13:00:37 2005
@@ -80,38 +80,34 @@ ascii_get_graphemes_inplace(Interp *inte
             offset, count, dest_string);
 }
 
-
 static STRING *
-from_charset(Interp *interpreter, STRING *src, STRING *dest)
+to_ascii(Interp *interpreter, STRING *src, STRING *dest)
 {
-    UINTVAL offs, c;
     String_iter iter;
+    UINTVAL c, len, offs;
+    unsigned char *p;
 
+    len = src->strlen;
     if (dest) {
-        Parrot_reallocate_string(interpreter, dest, src->strlen);
-        dest->bufused = src->strlen;
-        dest->strlen  = src->strlen;
+        Parrot_reallocate_string(interpreter, dest, len);
+    }
+    else {
+        /* the string can't grow - replace inplace */
+        dest = src;
     }
+    p = dest->strstart;
     ENCODING_ITER_INIT(interpreter, src, &iter);
-    for (offs = 0; offs < src->strlen; ++offs) {
+    for (offs = 0; offs < len; ++offs) {
         c = iter.get_and_advance(interpreter, &iter);
-        if (c >= 0x80) {
-            EXCEPTION(LOSSY_CONVERSION, "lossy conversion to ascii");
-        }
-        if (dest)
-            ENCODING_SET_BYTE(interpreter, dest, offs, c);
-    }
-    if (dest)
-        return dest;
-    src->charset = Parrot_ascii_charset_ptr;
-    return src;
-}
-
-static STRING *
-from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    internal_exception(UNIMPLEMENTED, "Can't do this yet");
-    return NULL;
+        if (c >= 128)
+            real_exception(interpreter, NULL, LOSSY_CONVERSION,
+                    "can't convert unicode string to ascii");
+        *p++ = (unsigned char)c;
+    }
+    dest->bufused = dest->strlen = len;
+    dest->charset = Parrot_ascii_charset_ptr;
+    dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interpreter, dest);
+    return dest;
 }
 
 static STRING *
@@ -131,19 +127,16 @@ to_unicode(Interp *interpreter, STRING *
 }
 
 static STRING *
-to_charset(Interp *interpreter, STRING *src,
-        CHARSET *new_charset, STRING *dest)
+to_charset(Interp *interpreter, STRING *src, STRING *dest)
 {
     charset_converter_t conversion_func;
 
     if ((conversion_func = Parrot_find_charset_converter(interpreter,
-                    src->charset, new_charset))) {
+                    src->charset, Parrot_ascii_charset_ptr))) {
          return conversion_func(interpreter, src, dest);
     }
     else {
-        STRING *res = to_unicode(interpreter, src, dest);
-        return new_charset->from_charset(interpreter, res, dest);
-
+        return to_ascii(interpreter, src, dest);
     }
 }
 
@@ -453,9 +446,6 @@ Parrot_charset_ascii_init(Interp *interp
         ascii_get_graphemes_inplace,
         set_graphemes,
         to_charset,
-        to_unicode,
-        from_charset,
-        from_unicode,
         compose,
         decompose,
         upcase,

Modified: trunk/charset/binary.c
==============================================================================
--- trunk/charset/binary.c      (original)
+++ trunk/charset/binary.c      Thu Nov 10 13:00:37 2005
@@ -34,33 +34,17 @@ set_graphemes(Interp *interpreter, STRIN
 }
 
 static STRING*
-to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING 
*dest)
+to_charset(Interp *interpreter, STRING *src, STRING *dest)
 {
+    charset_converter_t conversion_func;
+    if ((conversion_func = Parrot_find_charset_converter(interpreter,
+                    src->charset, Parrot_binary_charset_ptr))) {
+         return conversion_func(interpreter, src, dest);
+    }
     internal_exception(UNIMPLEMENTED, "to_charset for binary not implemented");
     return NULL;
 }
 
-static STRING*
-to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    internal_exception(UNIMPLEMENTED, "to_unicode for binary not implemented");
-    return NULL;
-}
-
-static STRING*
-from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    internal_exception(UNIMPLEMENTED, "Can't do this yet");
-    return NULL;
-}
-
-static STRING *
-from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    internal_exception(UNIMPLEMENTED, "Can't do this yet");
-    return NULL;
-}
-
 /* A noop. can't compose binary */
 static void
 compose(Interp *interpreter, STRING *source_string)
@@ -174,9 +158,6 @@ Parrot_charset_binary_init(Interp *inter
         ascii_get_graphemes_inplace,
         set_graphemes,
         to_charset,
-        to_unicode,
-        from_charset,
-        from_unicode,
         compose,
         decompose,
         upcase,

Modified: trunk/charset/iso-8859-1.c
==============================================================================
--- trunk/charset/iso-8859-1.c  (original)
+++ trunk/charset/iso-8859-1.c  Thu Nov 10 13:00:37 2005
@@ -40,7 +40,7 @@ set_graphemes(Interp *interpreter, STRIN
 }
 
 static STRING *
-from_charset(Interp *interpreter, STRING *src, STRING *dest)
+to_latin1(Interp *interpreter, STRING *src, STRING *dest)
 {
     UINTVAL offs, c;
     String_iter iter;
@@ -49,27 +49,21 @@ from_charset(Interp *interpreter, STRING
         Parrot_reallocate_string(interpreter, dest, src->strlen);
         dest->bufused = src->strlen;
         dest->strlen  = src->strlen;
+        dest->charset = Parrot_iso_8859_1_charset_ptr;
+        dest->encoding = Parrot_fixed_8_encoding_ptr;
     }
+    else
+        internal_exception(UNIMPLEMENTED,
+                "to_charset inplace for latin1 not implemented");
     ENCODING_ITER_INIT(interpreter, src, &iter);
     for (offs = 0; offs < src->strlen; ++offs) {
         c = iter.get_and_advance(interpreter, &iter);
         if (c >= 0x100) {
             EXCEPTION(LOSSY_CONVERSION, "lossy conversion to ascii");
         }
-        if (dest)
-            ENCODING_SET_BYTE(interpreter, dest, offs, c);
+        ENCODING_SET_BYTE(interpreter, dest, offs, c);
     }
-    if (dest)
-        return dest;
-    src->charset = Parrot_ascii_charset_ptr;
-    return src;
-}
-
-static STRING *
-from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    internal_exception(UNIMPLEMENTED, "Can't do this yet");
-    return NULL;
+    return dest;
 }
 
 static STRING *
@@ -106,19 +100,16 @@ to_unicode(Interp *interpreter, STRING *
 }
 
 static STRING *
-to_charset(Interp *interpreter, STRING *src,
-        CHARSET *new_charset, STRING *dest)
+to_charset(Interp *interpreter, STRING *src, STRING *dest)
 {
     charset_converter_t conversion_func;
 
     if ((conversion_func = Parrot_find_charset_converter(interpreter,
-                    src->charset, new_charset))) {
+                    src->charset, Parrot_iso_8859_1_charset_ptr))) {
          return conversion_func(interpreter, src, dest);
     }
     else {
-        STRING *res = to_unicode(interpreter, src, dest);
-        return new_charset->from_charset(interpreter, res, dest);
-
+        return to_latin1(interpreter, src, dest);
     }
 }
 
@@ -336,9 +327,6 @@ Parrot_charset_iso_8859_1_init(Interp *i
         ascii_get_graphemes_inplace,
         set_graphemes,
         to_charset,
-        to_unicode,
-        from_charset,
-        from_unicode,
         compose,
         decompose,
         upcase,

Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Thu Nov 10 13:00:37 2005
@@ -58,49 +58,44 @@ get_graphemes_inplace(Interp *interprete
 }
 
 static STRING*
-to_charset(Interp *interpreter, STRING *src,
-        CHARSET *new_charset, STRING *dest)
+to_charset(Interp *interpreter, STRING *src, STRING *dest)
 {
     charset_converter_t conversion_func;
+    String_iter iter;
+    UINTVAL c, len, offs;
 
     if ((conversion_func = Parrot_find_charset_converter(interpreter,
-                    src->charset, new_charset))) {
+                    src->charset, Parrot_unicode_charset_ptr))) {
          return conversion_func(interpreter, src, dest);
     }
-    else {
-        return new_charset->from_charset(interpreter, src, dest);
-
-    }
-}
-
-static STRING*
-to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    UNIMPL;
-    return NULL;
-}
-
-static STRING*
-from_charset(Interp *interpreter, STRING *src, STRING *dest)
-{
-    if (src->charset == Parrot_unicode_charset_ptr) {
-        if (!dest) {
-            /* inplace ok */
-            return src;
+    len = src->strlen;
+    if (dest) {
+        Parrot_reallocate_string(interpreter, dest, len);
+        dest->charset = Parrot_unicode_charset_ptr;
+        dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interpreter, dest);
+        ENCODING_ITER_INIT(interpreter, dest, &iter);
+        for (offs = 0; offs < src->strlen; ++offs) {
+            c = ENCODING_GET_CODEPOINT(interpreter, src, offs);
+            if (iter.bytepos >= PObj_buflen(dest) - 4) {
+                UINTVAL need = (UINTVAL)( (src->strlen - offs) * 1.5 );
+                if (need < 16)
+                    need = 16;
+                Parrot_reallocate_string(interpreter, dest,
+                        PObj_buflen(dest) + need);
+            }
+            iter.set_and_advance(interpreter, &iter, c);
         }
-        Parrot_reuse_COW_reference(interpreter, src, dest);
+        dest->bufused = iter.bytepos;
+        dest->strlen  = iter.charpos;
         return dest;
     }
-    UNIMPL;
+    else {
+        internal_exception(UNIMPLEMENTED,
+                "to_charset inplace for unicode not implemented");
+    }
     return NULL;
 }
 
-static STRING *
-from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
-    UNIMPL;
-    return NULL;
-}
 
 static void
 compose(Interp *interpreter, STRING *source_string)
@@ -434,9 +429,6 @@ Parrot_charset_unicode_init(Interp *inte
         get_graphemes_inplace,
         set_graphemes,
         to_charset,
-        to_unicode,
-        from_charset,
-        from_unicode,
         compose,
         decompose,
         upcase,

Modified: trunk/encodings/utf16.c
==============================================================================
--- trunk/encodings/utf16.c     (original)
+++ trunk/encodings/utf16.c     Thu Nov 10 13:00:37 2005
@@ -204,7 +204,7 @@ set_byte(Interp *interpreter, const STRI
        internal_exception(0, "set_byte past the end of the buffer");
     }
     contents = src->strstart;
-    contents[offset] = byte;
+    contents[offset] = (unsigned char)byte;
 }
 
 static STRING *

Modified: trunk/include/parrot/charset.h
==============================================================================
--- trunk/include/parrot/charset.h      (original)
+++ trunk/include/parrot/charset.h      Thu Nov 10 13:00:37 2005
@@ -38,9 +38,6 @@ typedef STRING *(*charset_get_graphemes_
 typedef void (*charset_set_graphemes_t)(Interp *, STRING *source_string, 
UINTVAL offset, UINTVAL replace_count, STRING *insert_string);
 
 typedef STRING * (*charset_to_charset_t)(Interp *, STRING *source_string,
-        CHARSET *new_charset, STRING *dest);
-typedef STRING * (*charset_to_unicode_t)(Interp *, STRING *src, STRING *dest);
-typedef STRING * (*charset_from_charset_t)(Interp *, STRING *source_string,
         STRING *dest);
 typedef STRING * (*charset_from_unicode_t)(Interp *, STRING *source_string,
         STRING *dest);
@@ -104,9 +101,6 @@ struct _charset {
     charset_get_graphemes_inplace_t get_graphemes_inplace;
     charset_set_graphemes_t set_graphemes;
     charset_to_charset_t to_charset;
-    charset_to_unicode_t to_unicode;
-    charset_from_charset_t from_charset;
-    charset_from_unicode_t from_unicode;
     charset_compose_t compose;
     charset_decompose_t decompose;
     charset_upcase_t upcase;
@@ -130,7 +124,6 @@ struct _charset {
 #define CHARSET_GET_GRAPEMES(interp, source, offset, count) ((CHARSET 
*)source->charset)->get_graphemes(interpreter, source, offset, count)
 #define CHARSET_GET_GRAPHEMES_INPLACE(interp, source, dest, offset, count) 
((CHARSET *)source->charset)->get_graphemes(interpreter, source, dest, offset, 
count)
 #define CHARSET_SET_GRAPHEMES(interp, source, offset, replace_count, insert) 
((CHARSET *)source->charset)->set_graphemes(interpreter, source, offset, 
replace_count, insert)
-#define CHARSET_TO_CHARSET(interp, source, new_charset, dest) ((CHARSET 
*)source->charset)->to_charset(interpreter, source, new_charset, dest)
 #define CHARSET_TO_UNICODE(interp, source, dest) ((CHARSET 
*)source->charset)->to_unicode(interpreter, source, dest)
 #define CHARSET_COMPOSE(interp, source) ((CHARSET 
*)source->charset)->compose(interpreter, source)
 #define CHARSET_DECOMPOSE(interp, source) ((CHARSET 
*)source->charset)->decompose(interpreter, source)

Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Thu Nov 10 13:00:37 2005
@@ -2653,6 +2653,19 @@ Parrot_string_find_not_cclass(Interp *in
     return CHARSET_FIND_NOT_CCLASS(interpreter, flags, s, offset, count);
 }
 
+/*
+
+=item C< STRING*
+Parrot_string_trans_charset(Interp *interpreter, STRING *src,
+        INTVAL charset_nr, STRING *dest)>
+
+If C<dest> == NULL convert  C<src> to the given charset inplace, else
+return a copy of C<src> with the charset in dest.
+
+=cut
+
+*/
+
 STRING*
 Parrot_string_trans_charset(Interp *interpreter, STRING *src,
         INTVAL charset_nr, STRING *dest)
@@ -2685,7 +2698,7 @@ Parrot_string_trans_charset(Interp *inte
             return src;
         }
     }
-    return CHARSET_TO_CHARSET(interpreter, src, new_charset, dest);
+    return new_charset->to_charset(interpreter, src, dest);
 }
 
 STRING*

Reply via email to