Author: leo
Date: Sat Apr 16 04:06:03 2005
New Revision: 7851

Modified:
   trunk/build_tools/build_nativecall.pl
   trunk/classes/default.pmc
   trunk/imcc/docs/syntax.pod
   trunk/imcc/pbc.c
   trunk/imcc/symreg.c
   trunk/imcc/symreg.h
   trunk/include/parrot/charset.h
   trunk/include/parrot/string_funcs.h
   trunk/io/io_unix.c
   trunk/io/io_win32.c
   trunk/lib/Parrot/Pmc2c.pm
   trunk/ops/string.ops
   trunk/ops/sys.ops
   trunk/src/charset.c
   trunk/src/datatypes.c
   trunk/src/debug.c
   trunk/src/embed.c
   trunk/src/exceptions.c
   trunk/src/exec_start.c
   trunk/src/inter_create.c
   trunk/src/inter_misc.c
   trunk/src/jit_debug.c
   trunk/src/jit_debug_xcoff.c
   trunk/src/method_util.c
   trunk/src/misc.c
   trunk/src/spf_render.c
   trunk/src/string.c
   trunk/t/op/string_cs.t
Log:
Strings. Finally. 17 - make ascii the default

* please read not on p6i WRT string change



Modified: trunk/build_tools/build_nativecall.pl
==============================================================================
--- trunk/build_tools/build_nativecall.pl       (original)
+++ trunk/build_tools/build_nativecall.pl       Sat Apr 16 04:06:03 2005
@@ -514,13 +514,13 @@
       see which signature has an unknown type. I am sure someone can come up
       with a neater way to do this.
      */
-    ns = string_make(interpreter, " is an unknown signature type", 29, 
"iso-8859-1", 0);
+    ns = string_make(interpreter, " is an unknown signature type", 29, 
"ascii", 0);
     message = string_concat(interpreter, signature, ns, 0);
 
 #if defined(CAN_BUILD_CALL_FRAMES)
-    ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is enabled, this 
should not happen", 58, "iso-8859-1", 0);
+    ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is enabled, this 
should not happen", 58, "ascii", 0);
 #else
-    ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is disabled, add 
the signature to src/call_list.txt", 75, "iso-8859-1", 0);
+    ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is disabled, add 
the signature to src/call_list.txt", 75, "ascii", 0);
 #endif
     message = string_concat(interpreter, message, ns, 0);
 

Modified: trunk/classes/default.pmc
==============================================================================
--- trunk/classes/default.pmc   (original)
+++ trunk/classes/default.pmc   Sat Apr 16 04:06:03 2005
@@ -80,7 +80,7 @@
      */
     static STRING *ro;
     if (!ro)
-        ro = string_make(interpreter, "_ro", 3, "iso-8859-1",
+        ro = string_make(interpreter, "_ro", 3, "ascii",
             PObj_constant_FLAG|PObj_external_FLAG);
     if (!string_compare(interpreter, key, ro)) {
 #else

Modified: trunk/imcc/docs/syntax.pod
==============================================================================
--- trunk/imcc/docs/syntax.pod  (original)
+++ trunk/imcc/docs/syntax.pod  Sat Apr 16 04:06:03 2005
@@ -83,16 +83,23 @@
 =item "string constants"
 
 Are delimited by B<">. A B<"> inside a string must be escaped by
-B<\">.
+B<\">.  Only 7-bit ASCII is accepted in string constants; to use
+characters outside thar range, specify an encoding in the way below.
 
 =item charset:"string constant"
 
-Like above with a charset attached to the string. Valid charset are
-currently: C<ascii>, C<binary>, and the default C<iso-8859-1>.
+Like above with a chracter set attached to the string. Valid character
+sets are currently: C<ascii> (the default), C<binary>, C<unicode>
+(with UTF-8 as the default encoding), and C<iso-8859-1>.
+
+=item charset:encoding:"string constant"
+
+Like above with an extra encoding attached to the string.
+Currently unimplemented.
 
 =item 'char constant'
 
-Are delimited by B<'>.
+Are delimited by B<'>. They are taken to be C<ascii> encoded.
 
 =item numeric constants
 

Modified: trunk/imcc/pbc.c
==============================================================================
--- trunk/imcc/pbc.c    (original)
+++ trunk/imcc/pbc.c    Sat Apr 16 04:06:03 2005
@@ -548,10 +548,8 @@
     char *buf = r->name;
     STRING *s = NULL;
     char *charset = NULL;
-    /*
-     * VT_UNICODE should better be VT_CHARSET
-     */
-    if (r->type & VT_UNICODE) {
+
+    if (r->type & VT_ENCODED) {
         char *p;
         p = strchr(r->name, ':');
         assert(p);
@@ -566,7 +564,7 @@
     }
     else if (*buf == '\'') {   /* TODO handle python raw strings */
         buf++;
-        s = string_make(interpreter, buf, strlen(buf) - 1, "iso-8859-1",
+        s = string_make(interpreter, buf, strlen(buf) - 1, "ascii",
                 PObj_constant_FLAG);
     }
     else {

Modified: trunk/imcc/symreg.c
==============================================================================
--- trunk/imcc/symreg.c (original)
+++ trunk/imcc/symreg.c Sat Apr 16 04:06:03 2005
@@ -342,7 +342,7 @@
     if (t == 'U') {
         /* charset:"string" */
         r->set = 'S';
-        r->type |= VT_UNICODE;
+        r->type |= VT_ENCODED;
     }
     r->use_count++;
     return r;

Modified: trunk/imcc/symreg.h
==============================================================================
--- trunk/imcc/symreg.h (original)
+++ trunk/imcc/symreg.h Sat Apr 16 04:06:03 2005
@@ -24,7 +24,7 @@
     VT_START_ZERO  = PF_VT_START_ZERO  ,   /* .. y 0..start */
     VT_END_INF     = PF_VT_END_INF     ,   /* x..  start..inf */
     VT_SLICE_BITS  = PF_VT_SLICE_BITS,
-    VT_UNICODE  = 1 << 16       /* unicode string constant */
+    VT_ENCODED  = 1 << 16       /* unicode string constant */
 };
 
 /* this VARTYPE needs register allocation and such */

Modified: trunk/include/parrot/charset.h
==============================================================================
--- trunk/include/parrot/charset.h      (original)
+++ trunk/include/parrot/charset.h      Sat Apr 16 04:06:03 2005
@@ -28,7 +28,7 @@
 extern CHARSET *Parrot_ascii_charset_ptr;
 #endif
 
-#define PARROT_DEFAULT_CHARSET Parrot_iso_8859_1_charset_ptr
+#define PARROT_DEFAULT_CHARSET Parrot_ascii_charset_ptr
 #define PARROT_BINARY_CHARSET Parrot_binary_charset
 #define PARROT_UNICODE_CHARSET Parrot_unicode_charset_ptr
 

Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h (original)
+++ trunk/include/parrot/string_funcs.h Sat Apr 16 04:06:03 2005
@@ -118,6 +118,8 @@
 STRING* Parrot_string_trans_charset(Interp *, STRING *src,
         INTVAL charset_nr, STRING *dest);
 
+CHARSET* string_rep_compatible (Interp *, STRING *a, const STRING *b);
+
 #endif /* PARROT_IN_CORE */
 #endif /* PARROT_STRING_FUNCS_H_GUARD */
 

Modified: trunk/io/io_unix.c
==============================================================================
--- trunk/io/io_unix.c  (original)
+++ trunk/io/io_unix.c  Sat Apr 16 04:06:03 2005
@@ -662,7 +662,7 @@
     sa.sin_port = htons(port);
 
     return string_make(interpreter, &sa, sizeof(struct sockaddr),
-            "iso-8859-1", 0);
+            "binary", 0);
 }
 
 
@@ -934,7 +934,7 @@
         else {
             close(io->fd);
         }
-        *s = string_make(interpreter, buf, bytesread, "iso-8859-1", 0);
+        *s = string_make(interpreter, buf, bytesread, "binary", 0);
         if (!*s) {
             PANIC("PIO_recv: Failed to allocate string");
         }

Modified: trunk/io/io_win32.c
==============================================================================
--- trunk/io/io_win32.c (original)
+++ trunk/io/io_win32.c Sat Apr 16 04:06:03 2005
@@ -549,7 +549,7 @@
 
     sa.sin_port = htons(port);
 
-    return string_make(interpreter, &sa, sizeof(struct sockaddr), 
"iso-8859-1", 0);
+    return string_make(interpreter, &sa, sizeof(struct sockaddr), "binary", 0);
 }
 
 
@@ -691,7 +691,7 @@
         else {
             close((SOCKET)io->fd);
         }
-        *s = string_make(interpreter, buf, bytesread, "iso-8859-1", 0);
+        *s = string_make(interpreter, buf, bytesread, "binary", 0);
         if(!*s) {
             PANIC("PIO_recv: Failed to allocate string");
         }

Modified: trunk/lib/Parrot/Pmc2c.pm
==============================================================================
--- trunk/lib/Parrot/Pmc2c.pm   (original)
+++ trunk/lib/Parrot/Pmc2c.pm   Sat Apr 16 04:06:03 2005
@@ -854,13 +854,13 @@
         $cout .= <<"EOC";
             vt_clone->base_type = entry;
             vt_clone->whoami = string_make(interp,
-                "$classname", @{[length($classname)]}, "iso-8859-1",
+                "$classname", @{[length($classname)]}, "ascii",
                 PObj_constant_FLAG|PObj_external_FLAG);
             vt_clone->isa_str = string_make(interp,
-                "$isa", @{[length($isa)]}, "iso-8859-1",
+                "$isa", @{[length($isa)]}, "ascii",
                 PObj_constant_FLAG|PObj_external_FLAG);
             vt_clone->does_str = string_make(interp,
-                "$does", @{[length($does)]}, "iso-8859-1",
+                "$does", @{[length($does)]}, "ascii",
                 PObj_constant_FLAG|PObj_external_FLAG);
 EOC
     }

Modified: trunk/ops/string.ops
==============================================================================
--- trunk/ops/string.ops        (original)
+++ trunk/ops/string.ops        Sat Apr 16 04:06:03 2005
@@ -327,7 +327,7 @@
     char *c = (char *)&$3;
     STRING *s;
 
-    s = string_make(interpreter, c, (UINTVAL)$2, "iso-8859-1", 0);
+    s = string_make(interpreter, c, (UINTVAL)$2, "binary", 0);
     $1 = string_concat(interpreter, $1, s, 1);
 
     goto NEXT();
@@ -337,7 +337,7 @@
     char *c = (char *)&$3;
     STRING *s;
 
-    s = string_make(interpreter, c, (UINTVAL)$2, "iso-8859-1", 0);
+    s = string_make(interpreter, c, (UINTVAL)$2, "binary", 0);
     $1 = string_concat(interpreter, $1, s, 1);
 
     goto NEXT();
@@ -372,7 +372,7 @@
     char *t;
     int i;
 
-    s = string_make(interpreter, c, (UINTVAL)$2, "iso-8859-1", 0);
+    s = string_make(interpreter, c, (UINTVAL)$2, "binary", 0);
     ln = string_length(interpreter, $1);
     if (ln < $4 + $2)
     {

Modified: trunk/ops/sys.ops
==============================================================================
--- trunk/ops/sys.ops   (original)
+++ trunk/ops/sys.ops   Sat Apr 16 04:06:03 2005
@@ -54,13 +54,13 @@
 
 op err(out STR) {
   const char *tmp = strerror(errno);
-  $1 = string_make(interpreter, tmp, strlen(tmp), "iso-8859-1", 0);
+  $1 = string_make(interpreter, tmp, strlen(tmp), "ascii", 0);
   goto NEXT();
 }
 
 op err(out STR, in INT) {
   const char *tmp = strerror($2);
-  $1 = string_make(interpreter, tmp, strlen(tmp), "iso-8859-1", 0);
+  $1 = string_make(interpreter, tmp, strlen(tmp), "ascii", 0);
   goto NEXT();
 }
 

Modified: trunk/src/charset.c
==============================================================================
--- trunk/src/charset.c (original)
+++ trunk/src/charset.c Sat Apr 16 04:06:03 2005
@@ -221,9 +221,6 @@
     }
     if (!strcmp("iso-8859-1", charsetname)) {
         Parrot_iso_8859_1_charset_ptr = charset;
-        if (!Parrot_default_charset_ptr) {
-            Parrot_default_charset_ptr = charset;
-        }
         return register_charset(interpreter, charsetname, charset);
     }
     if (!strcmp("unicode", charsetname)) {
@@ -231,7 +228,9 @@
         return register_charset(interpreter, charsetname, charset);
     }
     if (!strcmp("ascii", charsetname)) {
-
+        if (!Parrot_default_charset_ptr) {
+            Parrot_default_charset_ptr = charset;
+        }
         Parrot_ascii_charset_ptr = charset;
         return register_charset(interpreter, charsetname, charset);
     }
@@ -243,14 +242,14 @@
 {
     /* the order is crucial here:
      * 1) encodings, default = fixed_8
-     * 2) charsets   default = iso-8859-1
+     * 2) charsets   default = ascii
      */
     Parrot_encoding_fixed_8_init(interpreter);
     Parrot_encoding_utf8_init(interpreter);
 
+    Parrot_charset_ascii_init(interpreter);
     Parrot_charset_iso_8859_1_init(interpreter);
     Parrot_charset_binary_init(interpreter);
-    Parrot_charset_ascii_init(interpreter);
     Parrot_charset_unicode_init(interpreter);
     /*
      * now install charset converters

Modified: trunk/src/datatypes.c
==============================================================================
--- trunk/src/datatypes.c       (original)
+++ trunk/src/datatypes.c       Sat Apr 16 04:06:03 2005
@@ -71,7 +71,7 @@
         s = "illegal";
     else
         s = data_types[type - enum_first_type].name;
-    return string_make(interpreter, s, strlen(s), "iso-8859-1", 
PObj_external_FLAG);
+    return string_make(interpreter, s, strlen(s), NULL, PObj_external_FLAG);
 }
 
 /*

Modified: trunk/src/debug.c
==============================================================================
--- trunk/src/debug.c   (original)
+++ trunk/src/debug.c   Sat Apr 16 04:06:03 2005
@@ -152,7 +152,7 @@
             str++;
     }
 
-    *strP = string_make(interpreter, string, str - string, "iso-8859-1", 0);
+    *strP = string_make(interpreter, string, str - string, NULL, 0);
 
     if (*str)
         str++;
@@ -685,7 +685,7 @@
             str[i - 1] = command[i];
         str[i - 1] = '\0';
         condition->value = string_make(interpreter,
-            str, i - 1, "iso-8859-1", PObj_external_FLAG);
+            str, i - 1, NULL, PObj_external_FLAG);
         condition->type |= PDB_cond_const;
     }
     else if (condition->type & PDB_cond_pmc) {
@@ -892,7 +892,7 @@
         c[i] = '\0';
         na(command);
 
-        arg = string_make(interpreter, c, i, "iso-8859-1", 0);
+        arg = string_make(interpreter, c, i, NULL, 0);
         VTABLE_push_string(interpreter, userargv, arg);
     }
 
@@ -2952,7 +2952,7 @@
            interpreter->ctx.current_pc - PMC_sub(sub)->address
        );
     }
-    
+
     sub = interpinfo_p(interpreter, CURRENT_CONT);
     while (!PMC_IS_NULL(sub) && sub->vtable->base_type == 
enum_class_Continuation) {
        str = VTABLE_get_string(interpreter, sub);

Modified: trunk/src/embed.c
==============================================================================
--- trunk/src/embed.c   (original)
+++ trunk/src/embed.c   Sat Apr 16 04:06:03 2005
@@ -229,7 +229,7 @@
             return NULL;
         }
         fs = interpreter->current_file = string_make(interpreter, fullname,
-                strlen(fullname), "iso-8859-1", 0);
+                strlen(fullname), NULL, 0);
         if (!Parrot_stat_info_intval(interpreter, fs, STAT_EXISTS)) {
             PIO_eprintf(interpreter, "Parrot VM: Can't stat %s, code %i.\n",
                     fullname, errno);
@@ -409,7 +409,7 @@
     for (i = 0; i < argc; i++) {
         /* Run through argv, adding everything to @ARGS. */
         STRING *arg = string_make(interpreter, argv[i], strlen(argv[i]),
-                                  "iso-8859-1", PObj_external_FLAG);
+                                  NULL, PObj_external_FLAG);
 
         if (Interp_flags_TEST(interpreter, PARROT_DEBUG_FLAG)) {
             PIO_eprintf(interpreter, "\t%vd: %s\n", i, argv[i]);

Modified: trunk/src/exceptions.c
==============================================================================
--- trunk/src/exceptions.c      (original)
+++ trunk/src/exceptions.c      Sat Apr 16 04:06:03 2005
@@ -683,7 +683,7 @@
     }
     else
         msg = string_make(interpreter, format, strlen(format),
-                "iso-8859-1", PObj_external_FLAG);
+                NULL, PObj_external_FLAG);
     /* string_from_cstring(interpreter, format, strlen(format)); */
     /*
      * FIXME classify errors

Modified: trunk/src/exec_start.c
==============================================================================
--- trunk/src/exec_start.c      (original)
+++ trunk/src/exec_start.c      Sat Apr 16 04:06:03 2005
@@ -68,7 +68,7 @@
     for (i = 0; i < argc; i++) {
         /* Run through argv, adding everything to @ARGS. */
         STRING *arg = string_make(interpreter, argv[i], strlen(argv[i]),
-                                  "iso-8859-1", PObj_external_FLAG);
+                                  NULL, PObj_external_FLAG);
 
         if (Interp_flags_TEST(interpreter, PARROT_DEBUG_FLAG)) {
             PIO_eprintf(interpreter, "\t%vd: %s\n", i, argv[i]);

Modified: trunk/src/inter_create.c
==============================================================================
--- trunk/src/inter_create.c    (original)
+++ trunk/src/inter_create.c    Sat Apr 16 04:06:03 2005
@@ -203,9 +203,9 @@
 
     /* Set up defaults for line/package/file */
     interpreter->current_file =
-        string_make(interpreter, "(unknown file)", 14, "iso-8859-1", 0);
+        string_make(interpreter, "(unknown file)", 14, NULL, 0);
     interpreter->ctx.current_package =
-        string_make(interpreter, "(unknown package)", 18, "iso-8859-1", 0);;
+        string_make(interpreter, "(unknown package)", 18, NULL, 0);
 
     SET_NULL_P(interpreter->code, struct PackFile *);
     SET_NULL_P(interpreter->profile, ProfData *);

Modified: trunk/src/inter_misc.c
==============================================================================
--- trunk/src/inter_misc.c      (original)
+++ trunk/src/inter_misc.c      Sat Apr 16 04:06:03 2005
@@ -70,11 +70,11 @@
     method = pmc_new(interpreter, enum_class_NCI);
     VTABLE_set_pointer_keyed_str(interpreter, method,
             string_make(interpreter, proto, strlen(proto),
-                "iso-8859-1", PObj_constant_FLAG|PObj_external_FLAG),
+                NULL, PObj_constant_FLAG|PObj_external_FLAG),
             func);
     VTABLE_set_pmc_keyed_str(interpreter, method_table,
             string_make(interpreter, name,
-                strlen(name), "iso-8859-1",
+                strlen(name), NULL,
                 PObj_constant_FLAG|PObj_external_FLAG),
             method);
 #else
@@ -82,12 +82,12 @@
     method = pmc_new(interpreter, enum_class_NCI);
     VTABLE_set_pointer_keyed_str(interpreter, method,
             string_make(interpreter, proto, strlen(proto),
-                "iso-8859-1", PObj_constant_FLAG|PObj_external_FLAG),
+                NULL, PObj_constant_FLAG|PObj_external_FLAG),
             func);
     Parrot_store_global(interpreter,
         Parrot_base_vtables[type]->whoami,
             string_make(interpreter, name,
-                strlen(name), "iso-8859-1",
+                strlen(name), NULL,
                 PObj_constant_FLAG|PObj_external_FLAG),
             method);
 

Modified: trunk/src/jit_debug.c
==============================================================================
--- trunk/src/jit_debug.c       (original)
+++ trunk/src/jit_debug.c       Sat Apr 16 04:06:03 2005
@@ -258,7 +258,7 @@
     STRING *ret;
     ret = string_copy(interpreter, file);
     ret = string_append(interpreter, ret,
-            string_make(interpreter, ext, strlen(ext), "iso-8859-1",
+            string_make(interpreter, ext, strlen(ext), NULL,
                 PObj_external_FLAG),
             0);
     return ret;
@@ -290,7 +290,7 @@
     if (interpreter->code->cur_cs->debugs) {
         char *ext;
         char *src = interpreter->code->cur_cs->debugs->filename;
-        pasmfile = string_make(interpreter, src, strlen(src), "iso-8859-1",
+        pasmfile = string_make(interpreter, src, strlen(src), NULL,
                 PObj_external_FLAG);
         file = string_copy(interpreter, pasmfile);
         /* chop pasm/imc */
@@ -302,7 +302,7 @@
             file = string_chopn(interpreter, file, 3);
         else if (!ext) /* EVAL_n */
             file = string_append(interpreter, file,
-                    string_make(interpreter, ".", 1, "iso-8859-1", 
PObj_external_FLAG),
+                    string_make(interpreter, ".", 1, NULL, PObj_external_FLAG),
                     0);
     }
     else {

Modified: trunk/src/jit_debug_xcoff.c
==============================================================================
--- trunk/src/jit_debug_xcoff.c (original)
+++ trunk/src/jit_debug_xcoff.c Sat Apr 16 04:06:03 2005
@@ -227,7 +227,7 @@
     STRING *ret;
     ret = string_copy(interpreter, file);
     ret = string_append(interpreter, ret,
-            string_make(interpreter, ext, strlen(ext), "iso-8859-1",
+            string_make(interpreter, ext, strlen(ext), NULL,
                 PObj_external_FLAG),
             0);
     return ret;
@@ -258,7 +258,7 @@
     if (interpreter->code->cur_cs->debugs) {
         char *ext;
         char *src = interpreter->code->cur_cs->debugs->filename;
-        pasmfile = string_make(interpreter, src, strlen(src), "iso-8859-1",
+        pasmfile = string_make(interpreter, src, strlen(src), NULL,
                 PObj_external_FLAG);
         file = string_copy(interpreter, pasmfile);
         /* chop pasm/imc */
@@ -270,7 +270,7 @@
             file = string_chopn(interpreter, file, 3);
         else if (!ext) /* EVAL_n */
             file = string_append(interpreter, file,
-                    string_make(interpreter, ".", 1, "iso-8859-1", 
PObj_external_FLAG),
+                    string_make(interpreter, ".", 1, NULL, PObj_external_FLAG),
                     0);
     }
     else {

Modified: trunk/src/method_util.c
==============================================================================
--- trunk/src/method_util.c     (original)
+++ trunk/src/method_util.c     Sat Apr 16 04:06:03 2005
@@ -171,7 +171,7 @@
     while (recp->name != NULL) {
         PMC *csub = Parrot_new_csub(interp, recp->sub);
         STRING *name = string_make(interp, recp->name, strlen(recp->name),
-                                   "iso-8859-1", 0);
+                                   NULL, 0);
         key_set_string(interp, k, name);
         VTABLE_set_pmc_keyed(interp, hash, k, csub);
         ++recp;

Modified: trunk/src/misc.c
==============================================================================
--- trunk/src/misc.c    (original)
+++ trunk/src/misc.c    Sat Apr 16 04:06:03 2005
@@ -87,7 +87,7 @@
     STRING *realpat, *ret;
 
     realpat = string_make(interpreter, pat, strlen(pat),
-                                  "iso-8859-1", PObj_external_FLAG);
+                                  NULL, PObj_external_FLAG);
 
     ret = Parrot_vsprintf_s(interpreter, realpat, args);
 

Modified: trunk/src/spf_render.c
==============================================================================
--- trunk/src/spf_render.c      (original)
+++ trunk/src/spf_render.c      Sat Apr 16 04:06:03 2005
@@ -66,7 +66,7 @@
     } while (num /= base);
     if (minus)
         *--p = '-';
-    return string_make(interpreter, p, tail - p, "iso-8859-1", 0);
+    return string_make(interpreter, p, tail - p, "ascii", 0);
 }
 
 /*
@@ -696,7 +696,7 @@
                             if (obj->getstring == pmc_core.getstring) {
                                 PMC *tmp = 
VTABLE_get_pmc_keyed_int(interpreter,
                                     ((PMC *)obj->data), (obj->index));
-                               
+
                                 obj->index++;
                                 string = (VTABLE_get_repr(interpreter, tmp));
 

Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Sat Apr 16 04:06:03 2005
@@ -402,6 +402,20 @@
     return s;
 }
 
+CHARSET *
+string_rep_compatible (Interp *interpreter, STRING *a, const STRING *b)
+{
+    if (a->encoding != b->encoding)     /* XXX utf8 ascii */
+        return NULL;
+    if (a->charset == b->charset)
+        return a->charset;
+    if (b->charset == Parrot_ascii_charset_ptr)
+        return a->charset;
+    if (a->charset == Parrot_ascii_charset_ptr)
+        return b->charset;
+    return NULL;
+}
+
 /*
 
 =item C<STRING *
@@ -420,6 +434,8 @@
 {
     UINTVAL a_capacity, b_len;
     UINTVAL total_length;
+    CHARSET *cs;
+
     UNUSED(Uflags);
 
     /* If B isn't real, we just bail */
@@ -456,8 +472,9 @@
 
     /* A is now ready to receive the contents of B */
 
-    /* if same rep, can memcopy */
-    if (a->encoding == b->encoding && a->charset == b->charset) {
+    /* if compatible rep, can memcopy */
+    if ( (cs = string_rep_compatible(interpreter, a, b))) {
+        a->charset = cs;
         /* Tack B on the end of A */
         mem_sys_memcopy((void *)((ptrcast_t)a->strstart + a->bufused),
                 b->strstart, b->bufused);
@@ -546,7 +563,7 @@
 {
     switch (representation) {
         case enum_stringrep_one:
-            return "iso-8859-1";
+            return "ascii";
             break;
         default:
             internal_exception(INVALID_STRING_REPRESENTATION,
@@ -593,7 +610,7 @@
     'ascii'
     'binary'
 
-If C<charset> is unspecified the default charset 'iso-8859-1' will be
+If C<charset> is unspecified the default charset 'ascii' will be
 used.
 
 The value of C<flags> is optionally one or more C<PObj_*> flags C<OR>-ed
@@ -609,11 +626,10 @@
 {
     ENCODING *encoding;
     CHARSET *charset;
+
     if (!charset_name) {
-        internal_exception(MISSING_ENCODING_NAME,
-            "string_make: no charset name specified");
+        charset_name = "ascii";
     }
-
     charset = Parrot_find_charset(interpreter, charset_name);
     if (!charset) {
         internal_exception(UNIMPLEMENTED,
@@ -648,7 +664,7 @@
     s->charset = charset;
 
     if (encoding == Parrot_fixed_8_encoding_ptr &&
-            charset == Parrot_iso_8859_1_charset_ptr) {
+            charset == Parrot_ascii_charset_ptr) {
         /*
          * fast path for external (constant) strings - don't allocate
          * and copy data
@@ -1108,13 +1124,19 @@
     UINTVAL true_offset;
     UINTVAL true_length;
     INTVAL diff;
+    CHARSET *cs;
 
     true_offset = (UINTVAL)offset;
     true_length = (UINTVAL)length;
 
     /* may have different reps..... */
-    if (src->encoding != rep->encoding || src->charset != rep->charset) {
-        internal_exception(UNIMPLEMENTED, "Can't handle mixed types yet");
+    if ( !(cs = string_rep_compatible(interpreter, src, rep))) {
+        internal_exception(UNIMPLEMENTED,
+                "Cross-type string replace (%s/%s) (%s/%s) unsupported",
+                ((ENCODING *)(src->encoding))->name,
+                ((CHARSET *)(src->charset))->name,
+                ((ENCODING *)(rep->encoding))->name,
+                ((CHARSET *)(rep->charset))->name);
     }
 
     /* abs(-offset) may not be > strlen-1 */
@@ -1141,6 +1163,7 @@
         UINTVAL length_bytes = string_max_bytes(interpreter, src, true_length);
 
         dest = string_make_empty(interpreter, enum_stringrep_one, true_length);
+        dest->charset = src->charset;
 
         mem_sys_memcopy(dest->strstart,
                 (char *)src->strstart
@@ -1153,6 +1176,7 @@
         *d = dest;
     }
 
+    src->charset = cs;
     /* Now do the replacement */
 
 
@@ -1401,6 +1425,7 @@
     STRING *res = NULL;
     size_t minlen = 0;
     parrot_string_representation_t maxrep = enum_stringrep_one;
+    CHARSET *cs;
 
     /* think about case of dest string is one of the operands */
     if (s1 && s2) {
@@ -1422,11 +1447,13 @@
         res->strlen = 0;
         return res;
     }
-    else {
-        if (s1->encoding != s2->encoding || s1->charset != s2->charset) {
-            internal_exception(UNIMPLEMENTED,
-                    "Can't do cross-type bitwwise and");
-        }
+    if ( !(cs = string_rep_compatible(interpreter, s1, s2))) {
+        internal_exception(UNIMPLEMENTED,
+                "Cross-type string bitwise_and (%s/%s) (%s/%s) unsupported",
+                ((ENCODING *)(s1->encoding))->name,
+                ((CHARSET *)(s1->charset))->name,
+                ((ENCODING *)(s2->encoding))->name,
+                ((CHARSET *)(s2->charset))->name);
     }
 #if ! DISABLE_GC_DEBUG
     /* trigger GC for debug */
@@ -1435,6 +1462,7 @@
 #endif
 
     make_writable(interpreter, &res, minlen, enum_stringrep_one);
+    res->charset = cs;
 
     BITWISE_AND_STRINGS(Parrot_UInt1, Parrot_UInt1,
             Parrot_UInt1, s1, s2, res, minlen);
@@ -1509,6 +1537,7 @@
     STRING *res = NULL;
     size_t maxlen = 0;
     parrot_string_representation_t maxrep = enum_stringrep_one;
+    CHARSET *cs;
 
     maxlen = s1 ? s1->bufused: 0;
     if (s2 && s2->bufused > maxlen)
@@ -1527,6 +1556,20 @@
         return res;
     }
 
+    if (!s1)
+        cs = s2->charset;
+    else if (!s2)
+        cs = s1->charset;
+    else {
+        if ( !(cs = string_rep_compatible(interpreter, s1, s2))) {
+            internal_exception(UNIMPLEMENTED,
+                    "Cross-type string bitwise_or (%s/%s) (%s/%s) unsupported",
+                    ((ENCODING *)(s1->encoding))->name,
+                    ((CHARSET *)(s1->charset))->name,
+                    ((ENCODING *)(s2->encoding))->name,
+                    ((CHARSET *)(s2->charset))->name);
+        }
+    }
 #if ! DISABLE_GC_DEBUG
     /* trigger GC for debug */
     if (interpreter && GC_DEBUG(interpreter))
@@ -1534,6 +1577,7 @@
 #endif
 
     make_writable(interpreter, &res, maxlen, enum_stringrep_one);
+    res->charset = cs;
 
     BITWISE_OR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
             s1, s2, res, maxlen, |);
@@ -1567,6 +1611,7 @@
     STRING *res = NULL;
     size_t maxlen = 0;
     parrot_string_representation_t maxrep = enum_stringrep_one;
+    CHARSET *cs;
 
     maxlen = s1 ? s1->bufused: 0;
     if (s2 && s2->bufused > maxlen)
@@ -1585,6 +1630,20 @@
         return res;
     }
 
+    if (!s1)
+        cs = s2->charset;
+    else if (!s2)
+        cs = s1->charset;
+    else {
+        if ( !(cs = string_rep_compatible(interpreter, s1, s2))) {
+            internal_exception(UNIMPLEMENTED,
+                    "Cross-type string bitwise_xor (%s/%s) (%s/%s) 
unsupported",
+                    ((ENCODING *)(s1->encoding))->name,
+                    ((CHARSET *)(s1->charset))->name,
+                    ((ENCODING *)(s2->encoding))->name,
+                    ((CHARSET *)(s2->charset))->name);
+        }
+    }
 #if ! DISABLE_GC_DEBUG
     /* trigger GC for debug */
     if (interpreter && GC_DEBUG(interpreter))
@@ -1592,6 +1651,7 @@
 #endif
 
     make_writable(interpreter, &res, maxlen, enum_stringrep_one);
+    res->charset = cs;
 
     BITWISE_OR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
             s1, s2, res, maxlen, ^);
@@ -2289,7 +2349,7 @@
         --clength;
     flags = PObj_constant_FLAG;
     if (!charset)
-        charset = "iso-8859-1";
+        charset = "ascii";
     else
         flags |= PObj_private7_FLAG;  /* Pythonic unicode flag */
     result = string_make(interpreter, cstring, clength, charset, flags);

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Sat Apr 16 04:06:03 2005
@@ -34,7 +34,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "charset name" );
-    set S0, ascii:"ok 1\n"
+    set S0, "ok 1\n"
     charset I0, S0
     charsetname S1, I0
     print S1
@@ -66,7 +66,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "downcase" );
-    set S0, "AEIOU_���\n"
+    set S0, iso-8859-1:"AEIOU_���\n"
     downcase S1, S0
     print S1
     end
@@ -75,7 +75,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "upcase" );
-    set S0, "aeiou_����\n"
+    set S0, iso-8859-1:"aeiou_����\n"
     upcase S1, S0
     print S1
     end
@@ -84,7 +84,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "titlecase" );
-    set S0, "zAEIOU_���\n"
+    set S0, iso-8859-1:"zAEIOU_���\n"
     titlecase S1, S0
     print S1
     end
@@ -93,7 +93,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "is_whitespace");
-    set S0, "a\t\n \xa0"
+    set S0, iso-8859-1:"a\t\n \xa0"
     is_whitespace I0, S0, 0
     is_whitespace I1, S0, 1
     is_whitespace I2, S0, 2
@@ -248,7 +248,7 @@
 
 output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i");
     set S0, "abc"
-    find_charset I0, "ascii"
+    find_charset I0, "iso-8859-1"
     trans_charset S1, S0, I0
     print S1
     print "\n"
@@ -259,12 +259,12 @@
     end
 CODE
 abc
-ascii
+iso-8859-1
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i");
     set S1, "abc"
-    find_charset I0, "ascii"
+    find_charset I0, "iso-8859-1"
     trans_charset S1, I0
     print S1
     print "\n"
@@ -275,12 +275,12 @@
     end
 CODE
 abc
-ascii
+iso-8859-1
 OUTPUT
 
 
 output_like( <<'CODE', <<OUTPUT, "trans_charset_s_i - lossy");
-    set S1, "abc�"
+    set S1, iso-8859-1:"abc�"
     find_charset I0, "ascii"
     trans_charset S1, I0
     print "never\n"
@@ -306,7 +306,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to binary");
-    set S0, "abc"
+    set S0, iso-8859-1:"abc"
     find_charset I0, "binary"
     trans_charset S1, S0, I0
     print S1
@@ -322,7 +322,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i iso-8859-1 to binary");
-    set S1, "abc"
+    set S1, iso-8859-1:"abc"
     find_charset I0, "binary"
     trans_charset S1, I0
     print S1
@@ -402,7 +402,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to unicode");
-    set S0, "abc_�_"
+    set S0, iso-8859-1:"abc_�_"
     find_charset I0, "unicode"
     trans_charset S1, S0, I0
     print S1

Reply via email to