Author: leo
Date: Tue Nov 15 02:37:49 2005
New Revision: 9979

Modified:
   trunk/src/string.c
Log:
charsets and ecodings - ascii fallback

* string operations with mixed ascci + utf8 strings fallback to
  ascii, if the utf8 string is just ascii


Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Tue Nov 15 02:37:49 2005
@@ -406,26 +406,52 @@ string_make_empty(Interp *interpreter,
     return s;
 }
 
+/*
+
+=item C<CHARSET *string_rep_compatible (Interp *, STRING *a, const STRING *b,
+        ENCODING **e)>
+
+Find the "lowest" possible charset and encoding for the given string. E.g.
+
+  ascii <op> utf8 => utf8
+                  => ascii, B<if> C<STRING *b> has ascii chars only.
+
+Returs NULL, if no compatible string representation can be found.
+
+=cut
+
+*/
+
 CHARSET *
 string_rep_compatible (Interp *interpreter, STRING *a, const STRING *b,
         ENCODING **e)
 {
-    if (e)
-        *e = a->encoding;
+    /*
+     * a table could possibly simplify the logic
+     */
     if (a->encoding == Parrot_utf8_encoding_ptr &&
             b->charset == Parrot_ascii_charset_ptr) {
+        if (a->strlen == a->bufused) {
+            *e = Parrot_fixed_8_encoding_ptr;
+            return Parrot_ascii_charset_ptr;
+        }
+        *e = a->encoding;
         return a->charset;
     }
     if (b->encoding == Parrot_utf8_encoding_ptr &&
             a->charset == Parrot_ascii_charset_ptr) {
-        if (e)
-            *e = Parrot_utf8_encoding_ptr;
-        else
-            a->encoding = Parrot_utf8_encoding_ptr;
+        if (b->strlen == b->bufused) {
+            *e = Parrot_fixed_8_encoding_ptr;
+            return a->charset;
+        }
+        *e = Parrot_utf8_encoding_ptr;
         return b->charset;
     }
     if (a->encoding != b->encoding)
         return NULL;
+    if (a->encoding != Parrot_fixed_8_encoding_ptr)
+        return NULL;
+    *e = Parrot_fixed_8_encoding_ptr;
     if (a->charset == b->charset)
         return a->charset;
     if (b->charset == Parrot_ascii_charset_ptr)
@@ -458,6 +484,11 @@ string_append(Interp *interpreter,
     UINTVAL a_capacity, b_len;
     UINTVAL total_length;
     CHARSET *cs;
+    ENCODING *enc;
+
+    /*
+     * XXX should this be a CHARSET method?
+     */
 
     UNUSED(Uflags);
 
@@ -480,9 +511,11 @@ string_append(Interp *interpreter,
         return string_concat(interpreter, a, b, Uflags);
     }
 
-    cs = string_rep_compatible(interpreter, a, b, NULL);
-    if (cs != NULL)
+    cs = string_rep_compatible(interpreter, a, b, &enc);
+    if (cs != NULL) {
         a->charset = cs;
+        a->encoding = enc;
+    }
     else {
         /* upgrade to utf16 */
         Parrot_utf16_encoding_ptr->to_encoding(interpreter, a, NULL);
@@ -985,10 +1018,19 @@ string_concat(Interp *interpreter,
 {
     if (a != NULL && a->strlen != 0) {
         if (b != NULL && b->strlen != 0) {
-            STRING *result =
+            CHARSET *cs;
+            ENCODING *enc;
+            STRING *result;
+
+            cs = string_rep_compatible(interpreter, a, b, &enc);
+            if (!cs) {
+                cs = a->charset;
+                enc =a->encoding;
+            }
+            result =
                 string_make_direct(interpreter, NULL,
                         a->bufused + b->bufused,
-                        a->encoding, a->charset, 0);
+                        enc, cs, 0);
 
             string_append(interpreter, result, a, Uflags);
             string_append(interpreter, result, b, Uflags);
@@ -2404,7 +2446,7 @@ string_unescape_cstring(Interp * interpr
          *     then append the bytes w/o further processing to
          *     the string buffer
          *
-         * that is currently just fixed_8 encodings are correct    
+         * that is currently just fixed_8 encodings are correct
          */
         result = string_make_direct(interpreter, cstring, clength,
                 encoding, charset, flags);

Reply via email to