All --
My computer uses the ISO8859-1 encoding natively, and I figure we
eventually want to be able to transode from whatever the specific
native encoding is to Unicode. So, I copied strnative.* and did
some basic editing to produce striso8859_1.*.
The problem is, what I've done really doesn't deal with the issues.
I may be misunderstanding, but I think 'strnative' needs to go away
and we need to determine the precise native encoding, and call the
string encodings what they are. Data management of single byte
character sets (SBCS) could presumably be shared, but I would
expect that in many cases comparisons would vary. So, I wonder if
strnative should really be some support routines usable by
str*, when * is a SBCS. This sort of thing will surely be needed
when we start thinking seriously about portable byte code, since
loading up some bytecode with a string constant tagged as being
encoded "native" doesn't allow the reader to know what to do
with it. I think "native" is a predicate that tells us whether
the string in question was encoded with the *current* platform's
native encoding or not, not some inherent property of the string.
I have the Unicode 3.0 book and CD, and there are transcoding
tables there for ISO8859-X to Unicode, which presumably we can add
to CVS and transform into C arrays with a simple program. I'm
thinking we aren't yet ready for that, but I could drop them in
somewhere in advance if desired.
The current setup doesn't support dynamically loaded encodings,
which I think we'll want to do sometime soon (since doing that will
make it easier to tinker with multiple encodings).
Bottom line on this stuff is that I'm attaching the patch, but I
think some combination of enhancing my understanding of the intent
of what's already there and enhancing what is there would be required
for me to turn this into something actually useful.
Regards,
-- Gregor
_____________________________________________________________________
/ perl -e 'srand(-2091643526); print chr rand 90 for (0..4)' \
Gregor N. Purdy [EMAIL PROTECTED]
Focus Research, Inc. http://www.focusresearch.com/
8080 Beckett Center Drive #203 513-860-3570 vox
West Chester, OH 45069 513-860-3579 fax
\_____________________________________________________________________/
? examples/assembly/queens.pasm
? languages/jako/queens.jako
Index: MANIFEST
===================================================================
RCS file: /home/perlcvs/parrot/MANIFEST,v
retrieving revision 1.36
diff -a -u -r1.36 MANIFEST
--- MANIFEST 2001/10/15 21:37:07 1.36
+++ MANIFEST 2001/10/17 12:08:49
@@ -38,6 +38,7 @@
include/parrot/register.h
include/parrot/stacks.h
include/parrot/string.h
+include/parrot/striso8859_1.h
include/parrot/strnative.h
include/parrot/strutf16.h
include/parrot/strutf32.h
@@ -82,6 +83,7 @@
register.c
stacks.c
string.c
+striso8859_1.c
strnative.c
strutf16.c
strutf32.c
Index: Makefile.in
===================================================================
RCS file: /home/perlcvs/parrot/Makefile.in,v
retrieving revision 1.29
diff -a -u -r1.29 Makefile.in
--- Makefile.in 2001/10/17 11:54:07 1.29
+++ Makefile.in 2001/10/17 12:08:49
@@ -4,7 +4,7 @@
INC=include/parrot
H_FILES = $(INC)/config.h $(INC)/exceptions.h $(INC)/io.h $(INC)/op.h
$(INC)/register.h $(INC)/string.h $(INC)/events.h $(INC)/interpreter.h $(INC)/memory.h
$(INC)/parrot.h $(INC)/stacks.h $(INC)/packfile.h $(INC)/global_setup.h
$(INC)/vtable.h $(INC)/oplib/core_ops.h
-O_FILES = global_setup$(O) interpreter$(O) parrot$(O) register$(O) core_ops$(O)
memory$(O) packfile$(O) stacks$(O) string$(O) strnative$(O) strutf8$(O) strutf16$(O)
strutf32$(O) transcode$(O)
+O_FILES = global_setup$(O) interpreter$(O) parrot$(O) register$(O) core_ops$(O)
+memory$(O) packfile$(O) stacks$(O) string$(O) striso8859_1$(O) strnative$(O)
+strutf8$(O) strutf16$(O) strutf32$(O) transcode$(O)
#DO NOT ADD C COMPILER FLAGS HERE
#Add them in Configure.pl--look for the
@@ -34,8 +34,8 @@
$(TEST_PROG): test_main$(O) $(O_FILES)
$(CC) $(CFLAGS) -o $(TEST_PROG) $(O_FILES) test_main$(O) $(C_LIBS)
-$(PDUMP): pdump$(O) packfile$(O) memory$(O) global_setup$(O) string$(O) strnative$(O)
strutf8$(O) strutf16$(O) strutf32$(O) transcode$(O)
- $(CC) $(CFLAGS) -o $(PDUMP) pdump$(O) packfile$(O) memory$(O) global_setup$(O)
string$(O) strnative$(O) strutf8$(O) strutf16$(O) strutf32$(O) transcode$(O) $(C_LIBS)
+$(PDUMP): pdump$(O) packfile$(O) memory$(O) global_setup$(O) string$(O)
+striso8859_1$(O) strnative$(O) strutf8$(O) strutf16$(O) strutf32$(O) transcode$(O)
+ $(CC) $(CFLAGS) -o $(PDUMP) pdump$(O) packfile$(O) memory$(O) global_setup$(O)
+string$(O) striso8859_1$(O) strnative$(O) strutf8$(O) strutf16$(O) strutf32$(O)
+transcode$(O) $(C_LIBS)
test_main$(O): $(H_FILES)
@@ -57,6 +57,8 @@
global_setup$(O): $(H_FILES)
string$(O): $(H_FILES)
+
+striso8859_1$(O): $(H_FILES)
strnative$(O): $(H_FILES)
Index: string.c
===================================================================
RCS file: /home/perlcvs/parrot/string.c,v
retrieving revision 1.13
diff -a -u -r1.13 string.c
--- string.c 2001/10/10 18:21:04 1.13
+++ string.c 2001/10/17 12:08:49
@@ -19,10 +19,11 @@
*/
void
string_init(void) {
- Parrot_string_vtable[enc_native] = string_native_vtable();
- Parrot_string_vtable[enc_utf8] = string_utf8_vtable();
- Parrot_string_vtable[enc_utf16] = string_utf16_vtable();
- Parrot_string_vtable[enc_utf32] = string_utf32_vtable();
+ Parrot_string_vtable[enc_native] = string_native_vtable();
+ Parrot_string_vtable[enc_utf8] = string_utf8_vtable();
+ Parrot_string_vtable[enc_utf16] = string_utf16_vtable();
+ Parrot_string_vtable[enc_utf32] = string_utf32_vtable();
+ Parrot_string_vtable[enc_iso8859_1] = string_iso8859_1_vtable();
}
/*=for api string string_make
Index: striso8859_1.c
===================================================================
RCS file: striso8859_1.c
diff -N striso8859_1.c
--- /dev/null Wed Oct 17 03:32:26 2001
+++ striso8859_1.c Wed Oct 17 05:08:49 2001
@@ -0,0 +1,129 @@
+/* striso8859_1.c
+ * Copyright: (When this is determined...it will go here)
+ * CVS Info
+ * $Id: $
+ * Overview:
+ * This defines the iso8859_1 string routines.
+ * Data Structure and Algorithms:
+ * History:
+ * Notes:
+ * References:
+ */
+
+#include "parrot/parrot.h"
+
+/* Functions for handling strings in iso8859_1 byte format. */
+
+/*=for api string_iso8859_1 string_8_compute_strlen
+ return the length of s
+*/
+static INTVAL
+string_iso8859_1_compute_strlen (STRING *s) {
+ return s->bufused;
+}
+
+/*=for api string_iso8859_1 string_iso8859_1_max_bytes
+ return the max bytes needed for x characters.
+*/
+static INTVAL
+string_iso8859_1_max_bytes (INTVAL x) {
+ return x;
+}
+
+/*=for api string_iso8859_1 string_iso8859_1_concat
+ concatenate two strings
+*/
+static STRING*
+string_iso8859_1_concat(STRING* a, STRING* b, INTVAL flags) {
+ if (flags && a->encoding != b->encoding) {
+ /* Transcode */
+ STRING* t = b;
+ b = string_make(NULL, 0, enc_iso8859_1, 0, 0);
+ (Parrot_transcode_table[t->encoding->which][enc_iso8859_1])(t, b);
+ }
+ /* b is now in iso8859_1 format */
+ string_grow(a, a->strlen + b->strlen);
+ mem_sys_memcopy((void*)((ptrcast_t)a->bufstart + a->strlen), b->bufstart,
+b->strlen);
+ a->strlen = a->bufused = a->strlen + b->strlen;
+ return a;
+}
+
+/*=for api string_iso8859_1 string_iso8859_1_chopn
+ remove the last n characters from s
+*/
+static STRING*
+string_iso8859_1_chopn(STRING* s, INTVAL n) {
+ s->bufused -= n;
+ s->strlen -= n;
+ return s;
+}
+
+/*=for api string_iso8859_1 string_iso8859_1_substr
+ substring out length characters from src starting from offset
+ and store in dest. Grow dest if needed. Return dest
+*/
+static STRING*
+string_iso8859_1_substr(STRING* src, INTVAL offset, INTVAL length, STRING* dest)
+{
+ if (dest->encoding->which != enc_iso8859_1) {
+ /* It is now, matey. */
+ dest->encoding = &(Parrot_string_vtable[enc_iso8859_1]);
+ }
+
+ /* Offset and length have already been "normalized" */
+ string_grow(dest, length);
+ mem_sys_memcopy(dest->bufstart, (void*)((ptrcast_t)src->bufstart + offset),
+length);
+ dest->strlen = dest->bufused = length;
+
+ return dest;
+}
+
+/*=for api string_iso8859_1 string_iso8859_1_compare
+ compare two strings
+*/
+static INTVAL
+string_iso8859_1_compare(STRING* s1, STRING* s2) {
+ INTVAL cmp;
+
+ if (s1->bufused < s2->bufused) {
+ cmp = memcmp(s1->bufstart, s2->bufstart, s1->bufused);
+ if (cmp == 0) {
+ cmp = -1;
+ }
+ }
+ else {
+ cmp = memcmp(s1->bufstart, s2->bufstart, s2->bufused);
+ if (cmp == 0 && s1->bufused > s2->bufused) {
+ cmp = 1;
+ }
+ }
+
+ return cmp;
+}
+
+/*=for api string_iso8859_1 string_iso8859_1_vtable
+ return the vtable for the iso8859_1 string
+*/
+STRING_VTABLE
+string_iso8859_1_vtable (void) {
+ STRING_VTABLE sv = {
+ enc_iso8859_1,
+ string_iso8859_1_compute_strlen,
+ string_iso8859_1_max_bytes,
+ string_iso8859_1_concat,
+ string_iso8859_1_chopn,
+ string_iso8859_1_substr,
+ string_iso8859_1_compare,
+ };
+ return sv;
+}
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/
Index: include/parrot/string.h
===================================================================
RCS file: /home/perlcvs/parrot/include/parrot/string.h,v
retrieving revision 1.5
diff -a -u -r1.5 string.h
--- include/parrot/string.h 2001/10/10 18:21:04 1.5
+++ include/parrot/string.h 2001/10/17 12:08:49
@@ -16,11 +16,14 @@
typedef struct parrot_string STRING;
typedef struct string_vtable STRING_VTABLE;
+/* TODO: This approach thwarts loading new encodings at run time */
+
typedef enum {
enc_native,
enc_utf8,
enc_utf16,
enc_utf32,
+ enc_iso8859_1,
enc_foreign,
enc_max
} encoding_t;
@@ -92,6 +95,7 @@
#include "parrot/strutf8.h"
#include "parrot/strutf16.h"
#include "parrot/strutf32.h"
+#include "parrot/striso8859_1.h"
#endif
/*
Index: include/parrot/striso8859_1.h
===================================================================
RCS file: striso8859_1.h
diff -N striso8859_1.h
--- /dev/null Wed Oct 17 03:32:26 2001
+++ striso8859_1.h Wed Oct 17 05:08:49 2001
@@ -0,0 +1,29 @@
+/* striso8859_1.h
+ * Copyright: (When this is determined...it will go here)
+ * CVS Info
+ * $Id: $
+ * Overview:
+ * Native string handling functions header
+ * Data Structure and Algorithms:
+ * History:
+ * Notes:
+ * References:
+ */
+
+#if !defined(PARROT_STRISO8859_1_H_GUARD)
+#define PARROT_STRISO8859_1_H_GUARD
+
+STRING_VTABLE
+string_iso8859_1_vtable (void);
+
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/