Author: leo
Date: Wed Nov  9 14:26:49 2005
New Revision: 9873

Modified:
   trunk/include/parrot/charset.h
   trunk/include/parrot/string_funcs.h
   trunk/ops/ops.num
   trunk/ops/string.ops
   trunk/src/charset.c
   trunk/src/encoding.c
   trunk/src/string.c
   trunk/t/op/string_cs.t
Log:
encodings - opcodes

* create needed encoding opcodes
* fix a bug in encoding bootstrapping: 
    registering charsets needs encodings (and charsets) for charset names
    registering encodings needs charsets (and encodings) for encoding names
* add a test that uses trans_encoding instead of utf8 filter

More tests are very welcome. Thanks.


Modified: trunk/include/parrot/charset.h
==============================================================================
--- trunk/include/parrot/charset.h      (original)
+++ trunk/include/parrot/charset.h      Wed Nov  9 14:26:49 2005
@@ -150,7 +150,7 @@ struct _charset {
 #define CHARSET_COMPUTE_HASH(interp, source, seed) ((CHARSET 
*)source->charset)->compute_hash(interpreter, source, seed)
 #define CHARSET_GET_PREFERRED_ENCODING(interp, source) ((CHARSET 
*)source->charset)->preferred_encoding
 
-#define CHARSET_TO_ENCODING(interp, source, offset, count) ((ENCODING 
*)source->encoding)->to_encoding(interp, source, offset, count)
+#define CHARSET_TO_ENCODING(interp, source) ((ENCODING 
*)source->encoding)->to_encoding(interp, source)
 #define CHARSET_COPY_TO_ENCODING(interp, source) ((ENCODING 
*)source->encoding)->copy_to_encoding(interp, source)
 #define CHARSET_GET_CODEPOINT(interp, source, offset) ((ENCODING 
*)source->encoding)->get_codepoint(interp, source, offset)
 #define CHARSET_SET_CODEPOINT(interp, source, offset, codepoint) ((ENCODING 
*)source->encoding)->set_codepoint(interp, source, offset, codepoint)

Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h (original)
+++ trunk/include/parrot/string_funcs.h Wed Nov  9 14:26:49 2005
@@ -120,6 +120,8 @@ INTVAL Parrot_string_find_word_boundary(
 
 STRING* Parrot_string_trans_charset(Interp *, STRING *src,
         INTVAL charset_nr, STRING *dest);
+STRING* Parrot_string_trans_encoding(Interp *, STRING *src,
+        INTVAL encoding_nr, STRING *dest);
 
 CHARSET* string_rep_compatible (Interp *, STRING *a, const STRING *b,
         ENCODING**);

Modified: trunk/ops/ops.num
==============================================================================
--- trunk/ops/ops.num   (original)
+++ trunk/ops/ops.num   Wed Nov  9 14:26:49 2005
@@ -1227,3 +1227,15 @@ if_null_p_ic                   1196
 if_null_s_ic                   1197
 unless_null_p_ic               1198
 unless_null_s_ic               1199
+encoding_i_s                    1200
+encoding_i_sc                   1201
+encodingname_s_i                1202
+encodingname_s_ic               1203
+find_encoding_i_s               1204
+find_encoding_i_sc              1205
+trans_encoding_s_i              1206
+trans_encoding_s_ic             1207
+trans_encoding_s_s_i            1208
+trans_encoding_s_s_ic           1209
+trans_encoding_s_sc_i           1210
+trans_encoding_s_sc_ic          1211

Modified: trunk/ops/string.ops
==============================================================================
--- trunk/ops/string.ops        (original)
+++ trunk/ops/string.ops        Wed Nov  9 14:26:49 2005
@@ -583,6 +583,61 @@ op trans_charset(out STR, in STR, in INT
   goto NEXT();
 }
 
+=item B<encoding>(out INT, in STR)
+
+Return the encoding number of string $2.
+
+=item B<encodingname>(out STR, in INT)
+
+Return the name of encoding numbered $2.
+
+=item B<find_encoding>(out INT, in STR)
+
+Return the encoding number of the encoding named $2. If the encoding doesn't
+exit, throw an exception.
+
+=item B<trans_encoding>(inout STR, in INT)
+
+Change the string to have the specified encoding.
+
+=item B<trans_encoding>(out STR, in STR, in INT)
+
+Create a string $1 from $2 with the specified encoding.
+
+Both functions may throw an exception on information loss.
+
+=cut
+
+op encoding(out INT, in STR) :base_core {
+  $1 = Parrot_encoding_number_of_str(interpreter, $2);
+  goto NEXT();
+}
+
+op encodingname(out STR, in INT) :base_core {
+  $1 = string_copy(interpreter, Parrot_encoding_name(interpreter, $2));
+  goto NEXT();
+}
+
+op find_encoding(out INT, in STR) :base_core {
+  INTVAL n = Parrot_encoding_number(interpreter, $2);
+  if (n < 0)
+    real_exception(interpreter, NULL, 1,
+       "encoding '%Ss' not found", $2);
+  $1 = n;
+  goto NEXT();
+}
+
+op trans_encoding(inout STR, in INT) {
+  $1 = Parrot_string_trans_encoding(interpreter, $1, $2, NULL);
+  goto NEXT();
+}
+
+op trans_encoding(out STR, in STR, in INT) {
+  STRING *dest = new_string_header(interpreter, 0);
+  $1 = Parrot_string_trans_encoding(interpreter, $2, $3, dest);
+  goto NEXT();
+}
+
 =item B<is_cclass>(out INT, in INT, in STR, in INT)
 
 Set $1 to 1 if the codepoint of $3 at position $4 is in

Modified: trunk/src/charset.c
==============================================================================
--- trunk/src/charset.c (original)
+++ trunk/src/charset.c Wed Nov  9 14:26:49 2005
@@ -238,6 +238,8 @@ Parrot_register_charset(Interp *interpre
     return 0;
 }
 
+void parrot_init_encodings_2(Interp *interpreter);
+
 void
 Parrot_charsets_encodings_init(Interp *interpreter)
 {
@@ -253,6 +255,11 @@ Parrot_charsets_encodings_init(Interp *i
     Parrot_charset_iso_8859_1_init(interpreter);
     Parrot_charset_binary_init(interpreter);
     Parrot_charset_unicode_init(interpreter);
+
+    /*
+     * now encoding strings don't have a charset yet - set default
+     */
+    parrot_init_encodings_2(interpreter);
     /*
      * now install charset converters
      */

Modified: trunk/src/encoding.c
==============================================================================
--- trunk/src/encoding.c        (original)
+++ trunk/src/encoding.c        Wed Nov  9 14:26:49 2005
@@ -32,6 +32,18 @@ typedef struct {
 
 static All_encodings *all_encodings;
 
+void parrot_init_encodings_2(Interp *interpreter);
+void
+parrot_init_encodings_2(Interp *interpreter)
+{
+    int i, n;
+
+    n = all_encodings->n_encodings;
+    for (i = 0; i < n; ++i) {
+        all_encodings->enc[i].name->charset = Parrot_default_charset_ptr;
+    }
+}
+
 void
 parrot_deinit_encodings(Interp *interpreter)
 {

Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Wed Nov  9 14:26:49 2005
@@ -2679,6 +2679,39 @@ Parrot_string_trans_charset(Interp *inte
     return CHARSET_TO_CHARSET(interpreter, src, new_charset, dest);
 }
 
+STRING*
+Parrot_string_trans_encoding(Interp *interpreter, STRING *src,
+        INTVAL encoding_nr, STRING *dest)
+{
+    ENCODING *new_encoding;
+
+    if (!src)
+        return NULL;
+    new_encoding = Parrot_get_encoding(interpreter, encoding_nr);
+    if (!new_encoding)
+        real_exception(interpreter, NULL, INVALID_CHARTYPE,
+                "encoding #%d not found", (int) encoding_nr);
+    /*
+     * dest is an empty string header or NULL, if an inplace
+     * operation is desired
+     */
+    if (dest) {
+        if (new_encoding == src->encoding) {
+            Parrot_reuse_COW_reference(interpreter, src, dest);
+            dest->encoding = new_encoding;
+            return dest;
+        }
+        return new_encoding->copy_to_encoding(interpreter, src);
+    }
+    else {
+        if (new_encoding == src->encoding) {
+            return src;
+        }
+    }
+    new_encoding->to_encoding(interpreter, src);
+    return src;
+}
+
 /*
 
 =back

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Wed Nov  9 14:26:49 2005
@@ -16,7 +16,7 @@ Tests charset support.
 
 =cut
 
-use Parrot::Test tests => 32;
+use Parrot::Test tests => 33;
 use Test::More;
 
 output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -516,5 +516,19 @@ output_is( <<'CODE', <<"OUTPUT", "unicod
     print "\n"
     end
 CODE
+t\xc3\xb6tsch
+OUTPUT
+
+output_is( <<'CODE', <<"OUTPUT", "unicode downcase - transcharset");
+    set S0, iso-8859-1:"T�TSCH"
+    find_charset I0, "unicode"
+    trans_charset S1, S0, I0
+    downcase S1
+    find_encoding I0, "utf8"
+    trans_encoding S2, S1, I0
+    print S2
+    print "\n"
+    end
+CODE
 t\xc3\xb6tsch
 OUTPUT

Reply via email to