cvsuser 05/03/01 07:41:31
Modified: charset ascii.c ascii.h iso-8859-1.c
src charset.c
t/op string_cs.t
Log:
Strings. Finally. 9 - more charset converters
Revision Changes Path
1.14 +60 -15 parrot/charset/ascii.c
Index: ascii.c
===================================================================
RCS file: /cvs/public/parrot/charset/ascii.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- ascii.c 1 Mar 2005 14:19:45 -0000 1.13
+++ ascii.c 1 Mar 2005 15:41:25 -0000 1.14
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: ascii.c,v 1.13 2005/03/01 14:19:45 leo Exp $
+$Id: ascii.c,v 1.14 2005/03/01 15:41:25 leo Exp $
=head1 NAME
@@ -95,33 +95,43 @@
offset, count, dest_string);
}
+
static STRING *
-to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING
*dest)
+from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "to_charset for ascii not
implemented");
+ internal_exception(UNIMPLEMENTED, "Can't do this yet");
return NULL;
}
-
static STRING *
-to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
+from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "to_unicode for ascii not
implemented");
+ internal_exception(UNIMPLEMENTED, "Can't do this yet");
return NULL;
}
-static STRING *
-from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
+STRING *
+ascii_to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "Can't do this yet");
+ internal_exception(UNIMPLEMENTED,
+ "to_unicode for iso-8859-1 not implemented");
return NULL;
}
-static STRING *
-from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
+STRING *
+ascii_to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset,
STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "Can't do this yet");
- return NULL;
+ charset_converter_t conversion_func;
+
+ if ((conversion_func = Parrot_find_charset_converter(interpreter,
+ src->charset, new_charset))) {
+ return conversion_func(interpreter, src, dest);
+ }
+ else {
+ STRING *res = ascii_to_unicode(interpreter, src, dest);
+ return new_charset->from_charset(interpreter, res, dest);
+
+ }
}
/* A noop. can't compose ascii */
@@ -506,8 +516,8 @@
ascii_get_graphemes,
ascii_get_graphemes_inplace,
set_graphemes,
- to_charset,
- to_unicode,
+ ascii_to_charset,
+ ascii_to_unicode,
from_charset,
from_unicode,
compose,
@@ -555,6 +565,41 @@
return return_set;
}
+STRING *
+charset_cvt_ascii_to_binary(Interp *interpreter, STRING *src, STRING *dest)
+{
+ UINTVAL offs, c;
+ if (dest) {
+ Parrot_reallocate_string(interpreter, dest, src->strlen);
+ dest->bufused = src->bufused;
+ dest->strlen = src->strlen;
+ for (offs = 0; offs < src->strlen; ++offs) {
+ c = ENCODING_GET_BYTE(interpreter, src, offs);
+ ENCODING_SET_BYTE(interpreter, dest, offs, c);
+ }
+ return dest;
+ }
+ src->charset = Parrot_binary_charset_ptr;
+ return src;
+}
+
+STRING *
+charset_cvt_ascii_to_iso_8859_1(Interp *interpreter, STRING *src, STRING
*dest)
+{
+ UINTVAL offs, c;
+ if (dest) {
+ Parrot_reallocate_string(interpreter, dest, src->strlen);
+ dest->bufused = src->bufused;
+ dest->strlen = src->strlen;
+ for (offs = 0; offs < src->strlen; ++offs) {
+ c = ENCODING_GET_BYTE(interpreter, src, offs);
+ ENCODING_SET_BYTE(interpreter, dest, offs, c);
+ }
+ return dest;
+ }
+ src->charset = Parrot_iso_8859_1_charset_ptr;
+ return src;
+}
/*
* Local variables:
* c-indentation-style: bsd
1.11 +5 -1 parrot/charset/ascii.h
Index: ascii.h
===================================================================
RCS file: /cvs/public/parrot/charset/ascii.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- ascii.h 1 Mar 2005 14:19:45 -0000 1.10
+++ ascii.h 1 Mar 2005 15:41:25 -0000 1.11
@@ -1,7 +1,7 @@
/* ascii.h
* Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: ascii.h,v 1.10 2005/03/01 14:19:45 leo Exp $
+ * $Id: ascii.h,v 1.11 2005/03/01 15:41:25 leo Exp $
* Overview:
* This is the header for the ascii charset functions
* Data Structure and Algorithms:
@@ -39,6 +39,8 @@
INTVAL ascii_cs_rindex(Interp *, const STRING *source_string,
const STRING *search_string, UINTVAL offset);
size_t ascii_compute_hash(Interp *, STRING *source_string);
+STRING * ascii_to_unicode(Interp *, STRING *source_string, STRING *dest);
+STRING * ascii_to_charset(Interp *, STRING *src, CHARSET *new_cs, STRING
*dest);
static void compose(Interp *, STRING *source_string);
static void decompose(Interp *, STRING *source_string);
@@ -63,6 +65,8 @@
static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL
offset);
CHARSET *Parrot_charset_ascii_init(Interp *);
+STRING *charset_cvt_ascii_to_binary(Interp *, STRING *src, STRING *dest);
+STRING *charset_cvt_ascii_to_iso_8859_1(Interp *, STRING *src, STRING *dest);
#endif /* PARROT_CHARSET_ASCII_H_GUARD */
/*
1.11 +3 -27 parrot/charset/iso-8859-1.c
Index: iso-8859-1.c
===================================================================
RCS file: /cvs/public/parrot/charset/iso-8859-1.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- iso-8859-1.c 1 Mar 2005 14:19:45 -0000 1.10
+++ iso-8859-1.c 1 Mar 2005 15:41:25 -0000 1.11
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: iso-8859-1.c,v 1.10 2005/03/01 14:19:45 leo Exp $
+$Id: iso-8859-1.c,v 1.11 2005/03/01 15:41:25 leo Exp $
=head1 NAME
@@ -81,30 +81,6 @@
}
-static STRING *
-to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
- internal_exception(UNIMPLEMENTED,
- "to_unicode for iso-8859-1 not implemented");
- return NULL;
-}
-
-static STRING *
-to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING
*dest)
-{
- charset_converter_t conversion_func;
-
- if ((conversion_func = Parrot_find_charset_converter(interpreter,
- src->charset, new_charset))) {
- return conversion_func(interpreter, src, dest);
- }
- else {
- STRING *res = to_unicode(interpreter, src, dest);
- return new_charset->from_charset(interpreter, res, dest);
-
- }
-}
-
/* A noop. can't compose iso-8859-1 */
static void
compose(Interp *interpreter, STRING *source_string)
@@ -369,8 +345,8 @@
ascii_get_graphemes,
ascii_get_graphemes_inplace,
set_graphemes,
- to_charset,
- to_unicode,
+ ascii_to_charset,
+ ascii_to_unicode,
from_charset,
from_unicode,
compose,
1.10 +31 -5 parrot/src/charset.c
Index: charset.c
===================================================================
RCS file: /cvs/public/parrot/src/charset.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- charset.c 1 Mar 2005 14:19:48 -0000 1.9
+++ charset.c 1 Mar 2005 15:41:26 -0000 1.10
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: charset.c,v 1.9 2005/03/01 14:19:48 leo Exp $
+$Id: charset.c,v 1.10 2005/03/01 15:41:26 leo Exp $
=head1 NAME
@@ -15,6 +15,7 @@
#define PARROT_NO_EXTERN_CHARSET_PTRS
#include "parrot/parrot.h"
#include "../charset/iso-8859-1.h"
+#include "../charset/ascii.h"
CHARSET *Parrot_iso_8859_1_charset_ptr;
CHARSET *Parrot_binary_charset_ptr;
@@ -59,6 +60,8 @@
n = all_charsets->n_charsets;
for (i = 0; i < n; ++i) {
+ if (all_charsets->set[i].n_converters)
+ mem_sys_free(all_charsets->set[i].to_converters);
mem_sys_free(all_charsets->set[i].charset);
}
mem_sys_free(all_charsets->set);
@@ -174,6 +177,24 @@
return 1;
}
+static void
+register_static_converters(Interp *interpreter)
+{
+ Parrot_register_charset_converter(interpreter,
+ Parrot_iso_8859_1_charset_ptr, Parrot_ascii_charset_ptr,
+ charset_cvt_iso_8859_1_to_ascii);
+ Parrot_register_charset_converter(interpreter,
+ Parrot_iso_8859_1_charset_ptr, Parrot_binary_charset_ptr,
+ charset_cvt_ascii_to_binary);
+
+ Parrot_register_charset_converter(interpreter,
+ Parrot_ascii_charset_ptr, Parrot_binary_charset_ptr,
+ charset_cvt_ascii_to_binary);
+ Parrot_register_charset_converter(interpreter,
+ Parrot_ascii_charset_ptr, Parrot_iso_8859_1_charset_ptr,
+ charset_cvt_ascii_to_iso_8859_1);
+}
+
INTVAL
Parrot_register_charset(Interp *interpreter, const char *charsetname,
CHARSET *charset)
@@ -199,11 +220,16 @@
return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("ascii", charsetname)) {
+ INTVAL result;
+
Parrot_ascii_charset_ptr = charset;
- Parrot_register_charset_converter(interpreter,
- Parrot_iso_8859_1_charset_ptr, charset,
- charset_cvt_iso_8859_1_to_ascii);
- return register_charset(interpreter, charsetname, charset);
+ result = register_charset(interpreter, charsetname, charset);
+ /*
+ * ascii is currently the last charset - so we can
+ * now install charset converters
+ */
+ register_static_converters(interpreter);
+ return result;
}
return 0;
}
1.7 +98 -2 parrot/t/op/string_cs.t
Index: string_cs.t
===================================================================
RCS file: /cvs/public/parrot/t/op/string_cs.t,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- string_cs.t 1 Mar 2005 14:19:49 -0000 1.6
+++ string_cs.t 1 Mar 2005 15:41:31 -0000 1.7
@@ -1,6 +1,6 @@
#! perl -w
# Copyright: 2001-2004 The Perl Foundation. All Rights Reserved.
-# $Id: string_cs.t,v 1.6 2005/03/01 14:19:49 leo Exp $
+# $Id: string_cs.t,v 1.7 2005/03/01 15:41:31 leo Exp $
=head1 NAME
@@ -16,7 +16,7 @@
=cut
-use Parrot::Test tests => 20;
+use Parrot::Test tests => 26;
use Test::More;
output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -305,3 +305,99 @@
ascii
OUTPUT
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to binary");
+ set S0, "abc"
+ find_charset I0, "binary"
+ trans_charset S1, S0, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end
+CODE
+abc
+binary
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i iso-8859-1 to binary");
+ set S1, "abc"
+ find_charset I0, "binary"
+ trans_charset S1, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end
+CODE
+abc
+binary
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i ascii to binary");
+ set S0, ascii:"abc"
+ find_charset I0, "binary"
+ trans_charset S1, S0, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end
+CODE
+abc
+binary
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i ascii to binary");
+ set S1, ascii:"abc"
+ find_charset I0, "binary"
+ trans_charset S1, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end
+CODE
+abc
+binary
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i ascii to iso-8859-1");
+ set S0, ascii:"abc"
+ find_charset I0, "iso-8859-1"
+ trans_charset S1, S0, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end
+CODE
+abc
+iso-8859-1
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i ascii to iso-8859-1");
+ set S1, ascii:"abc"
+ find_charset I0, "iso-8859-1"
+ trans_charset S1, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end
+CODE
+abc
+iso-8859-1
+OUTPUT
+