cvsuser 05/02/28 05:35:47
Modified: include/parrot charset.h
ops ops.num string.ops
src charset.c string.c
t/op string_cs.t
Log:
Strings. Finally. 2 - some charset opcodes
* opcodes and interface functions for:
charset, charsetname, find_charset
* refine charset registration
Revision Changes Path
1.7 +6 -1 parrot/include/parrot/charset.h
Index: charset.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/charset.h,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- charset.h 28 Feb 2005 10:41:19 -0000 1.6
+++ charset.h 28 Feb 2005 13:35:43 -0000 1.7
@@ -1,7 +1,7 @@
/* charset.h
* Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: charset.h,v 1.6 2005/02/28 10:41:19 leo Exp $
+ * $Id: charset.h,v 1.7 2005/02/28 13:35:43 leo Exp $
* Overview:
* This is the header for the 8-bit fixed-width encoding
* Data Structure and Algorithms:
@@ -80,6 +80,11 @@
typedef INTVAL (*charset_converter_t)(Interp *, CHARSET *lhs, CHARSET *rhs);
charset_converter_t Parrot_find_charset_converter(Interp *, CHARSET *lhs,
CHARSET *rhs);
+void Parrot_deinit_charsets(Interp *);
+INTVAL Parrot_charset_number(Interp *, STRING *charsetname);
+STRING* Parrot_charset_name(Interp *, INTVAL);
+INTVAL Parrot_charset_number_of_str(Interp *, STRING *src);
+
struct _charset {
const char *name;
charset_get_graphemes_t get_graphemes;
1.57 +6 -0 parrot/ops/ops.num
Index: ops.num
===================================================================
RCS file: /cvs/public/parrot/ops/ops.num,v
retrieving revision 1.56
retrieving revision 1.57
diff -u -r1.56 -r1.57
--- ops.num 14 Feb 2005 10:57:23 -0000 1.56
+++ ops.num 28 Feb 2005 13:35:45 -0000 1.57
@@ -1372,3 +1372,9 @@
listen_i_p_i 1342
listen_i_p_ic 1343
accept_p_p 1344
+charset_i_s 1345
+charset_i_sc 1346
+charsetname_s_i 1347
+charsetname_s_ic 1348
+find_charset_i_s 1349
+find_charset_i_sc 1350
1.32 +34 -0 parrot/ops/string.ops
Index: string.ops
===================================================================
RCS file: /cvs/public/parrot/ops/string.ops,v
retrieving revision 1.31
retrieving revision 1.32
diff -u -r1.31 -r1.32
--- string.ops 5 Jan 2005 17:03:03 -0000 1.31
+++ string.ops 28 Feb 2005 13:35:45 -0000 1.32
@@ -621,6 +621,40 @@
goto NEXT();
}
+=item B<charset>(out INT, in STR)
+
+Return the charset number of string $2.
+
+=item B<charsetname>(out STR, in INT)
+
+Return the name of charset numbered $2.
+
+=item B<find_charset>(out INT, in STR)
+
+Return the charset number of the charset named $2. If the charset doesn't
+exit, throw an exception.
+
+=cut
+
+op charset(out INT, in STR) :base_core {
+ $1 = Parrot_charset_number_of_str(interpreter, $2);
+ goto NEXT();
+}
+
+op charsetname(out STR, in INT) :base_core {
+ $1 = string_copy(interpreter, Parrot_charset_name(interpreter, $2));
+ goto NEXT();
+}
+
+op find_charset(out INT, in STR) :base_core {
+ INTVAL n = Parrot_charset_number(interpreter, $2);
+ if (n < 0)
+ real_exception(interpreter, NULL, 1,
+ "charset '%Ss' not found", $2);
+ $1 = n;
+ goto NEXT();
+}
+
=back
=head1 COPYRIGHT
1.6 +102 -6 parrot/src/charset.c
Index: charset.c
===================================================================
RCS file: /cvs/public/parrot/src/charset.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- charset.c 28 Feb 2005 10:41:20 -0000 1.5
+++ charset.c 28 Feb 2005 13:35:46 -0000 1.6
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: charset.c,v 1.5 2005/02/28 10:41:20 leo Exp $
+$Id: charset.c,v 1.6 2005/02/28 13:35:46 leo Exp $
=head1 NAME
@@ -21,13 +21,34 @@
CHARSET *Parrot_unicode_charset_ptr;
CHARSET *Parrot_ascii_charset_ptr;
+/*
+ * all registered charsets are collected in one global structure
+ */
+
+typedef struct {
+ CHARSET *charset;
+ STRING *name;
+} One_charset;
+
+typedef struct {
+ int n_charsets;
+ One_charset *set;
+} All_charsets;
+
+static All_charsets *all_charsets;
+
+
CHARSET *
Parrot_new_charset(Interp *interpreter)
{
-
return mem_sys_allocate(sizeof(CHARSET));
}
+void
+Parrot_deinit_charsets(Interp *interpreter)
+{
+}
+
CHARSET *
Parrot_find_charset(Interp *interpreter, const char *charsetname)
{
@@ -50,28 +71,103 @@
return NULL;
}
+/*
+
+=item C<INTVAL Parrot_charset_number(Interp *, STRING *charsetname)>
+
+Return the number of the charset or -1 if not found.
+
+=item C<INTVAL Parrot_charset_number_of_str(Interp *, const STRING *src)>
+
+Return the number of the charset of the given string or -1 if not found.
+
+*/
+
+INTVAL
+Parrot_charset_number(Interp *interpreter, STRING *charsetname)
+{
+ int i, n;
+
+ n = all_charsets->n_charsets;
+ for (i = 0; i < n; ++i) {
+ if (!string_equal(interpreter, all_charsets->set[i].name,
charsetname))
+ return i;
+ }
+ return -1;
+}
+
+INTVAL
+Parrot_charset_number_of_str(Interp *interpreter, STRING *src)
+{
+ int i, n;
+
+ n = all_charsets->n_charsets;
+ for (i = 0; i < n; ++i) {
+ if (src->charset == all_charsets->set[i].charset)
+ return i;
+ }
+ return -1;
+}
+
+STRING*
+Parrot_charset_name(Interp *interpreter, INTVAL number_of_charset)
+{
+ if (number_of_charset >= all_charsets->n_charsets)
+ return NULL;
+ return all_charsets->set[number_of_charset].name;
+}
+
+static INTVAL
+register_charset(Interp *interpreter, const char *charsetname,
+ CHARSET *charset)
+{
+ int i, n;
+
+ n = all_charsets->n_charsets;
+ for (i = 0; i < n; ++i) {
+ if (!strcmp(all_charsets->set[i].charset->name, charsetname))
+ return 0;
+ }
+ if (!n)
+ all_charsets->set = mem_sys_allocate(sizeof(One_charset));
+ else
+ all_charsets->set = mem_sys_realloc(all_charsets->set, (n + 1) *
+ sizeof(One_charset));
+ all_charsets->n_charsets++;
+ all_charsets->set[n].charset = charset;
+ all_charsets->set[n].name = string_from_cstring(interpreter,
+ charsetname, 0);
+
+ return 1;
+}
+
INTVAL
Parrot_register_charset(Interp *interpreter, const char *charsetname,
CHARSET *charset)
{
+ if (!all_charsets) {
+ all_charsets = mem_sys_allocate(sizeof(All_charsets));
+ all_charsets->n_charsets = 0;
+ all_charsets->set = NULL;
+ }
if (!strcmp("binary", charsetname)) {
Parrot_binary_charset_ptr = charset;
- return 1;
+ return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("iso-8859-1", charsetname)) {
Parrot_iso_8859_1_charset_ptr = charset;
if (!Parrot_default_charset_ptr) {
Parrot_default_charset_ptr = charset;
}
- return 1;
+ return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("unicode", charsetname)) {
Parrot_unicode_charset_ptr = charset;
- return 1;
+ return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("ascii", charsetname)) {
Parrot_ascii_charset_ptr = charset;
- return 1;
+ return register_charset(interpreter, charsetname, charset);
}
return 0;
}
1.235 +9 -3 parrot/src/string.c
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.234
retrieving revision 1.235
diff -u -r1.234 -r1.235
--- string.c 28 Feb 2005 10:41:20 -0000 1.234
+++ string.c 28 Feb 2005 13:35:46 -0000 1.235
@@ -1,6 +1,6 @@
/*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: string.c,v 1.234 2005/02/28 10:41:20 leo Exp $
+$Id: string.c,v 1.235 2005/02/28 13:35:46 leo Exp $
=head1 NAME
@@ -288,11 +288,16 @@
} __ptr_u;
if (!interpreter->parent_interpreter) {
- /* Load in the basic encodings and charsets */
+ /* Load in the basic encodings and charsets
+ *
+ * the order is crucial here:
+ * 1) default encoding = fixed_8
+ * 2) default charset = iso-8859-1
+ */
Parrot_encoding_fixed_8_init(interpreter);
+ Parrot_charset_iso_8859_1_init(interpreter);
Parrot_charset_binary_init(interpreter);
Parrot_charset_ascii_init(interpreter);
- Parrot_charset_iso_8859_1_init(interpreter);
/* DEFAULT_ICU_DATA_DIR is configured at build time, or it may be
set through the $PARROT_ICU_DATA_DIR environment variable. Need
@@ -354,6 +359,7 @@
{
mem_sys_free(interpreter->const_cstring_table);
interpreter->const_cstring_table = NULL;
+ Parrot_deinit_charsets(interpreter);
}
/*
1.2 +33 -2 parrot/t/op/string_cs.t
Index: string_cs.t
===================================================================
RCS file: /cvs/public/parrot/t/op/string_cs.t,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- string_cs.t 28 Feb 2005 10:41:15 -0000 1.1
+++ string_cs.t 28 Feb 2005 13:35:47 -0000 1.2
@@ -1,6 +1,6 @@
#! perl -w
# Copyright: 2001-2004 The Perl Foundation. All Rights Reserved.
-# $Id: string_cs.t,v 1.1 2005/02/28 10:41:15 leo Exp $
+# $Id: string_cs.t,v 1.2 2005/02/28 13:35:47 leo Exp $
=head1 NAME
@@ -16,7 +16,7 @@
=cut
-use Parrot::Test tests => 1;
+use Parrot::Test tests => 4;
use Test::More;
output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -33,3 +33,34 @@
ok 3
OUTPUT
+output_is( <<'CODE', <<OUTPUT, "charset name" );
+ set S0, ascii:"ok 1\n"
+ charset I0, S0
+ charsetname S1, I0
+ print S1
+ print "\n"
+ end
+CODE
+ascii
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "find_charset" );
+ find_charset I0, "iso-8859-1"
+ print "ok 1\n"
+ find_charset I0, "ascii"
+ print "ok 2\n"
+ find_charset I0, "binary"
+ print "ok 3\n"
+ end
+CODE
+ok 1
+ok 2
+ok 3
+OUTPUT
+
+output_like( <<'CODE', <<OUTPUT, "find_charset - not existing" );
+ find_charset I0, "no_such"
+ end
+CODE
+/charset 'no_such' not found/
+OUTPUT