cvsuser 05/03/02 02:43:16
Modified: charset ascii.c
encodings fixed_8.c utf8.c
include/parrot encoding.h string.h
src string.c
Log:
Strings. Finally. 12 - new iter_init encoding function
* adapt and generalize the iterator stuff from utf8
* use it in ascii charset
Revision Changes Path
1.17 +13 -5 parrot/charset/ascii.c
Index: ascii.c
===================================================================
RCS file: /cvs/public/parrot/charset/ascii.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- ascii.c 2 Mar 2005 09:03:25 -0000 1.16
+++ ascii.c 2 Mar 2005 10:43:13 -0000 1.17
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: ascii.c,v 1.16 2005/03/02 09:03:25 leo Exp $
+$Id: ascii.c,v 1.17 2005/03/02 10:43:13 leo Exp $
=head1 NAME
@@ -107,13 +107,16 @@
from_charset(Interp *interpreter, STRING *src, STRING *dest)
{
UINTVAL offs, c;
+ String_iter iter;
+
if (dest) {
Parrot_reallocate_string(interpreter, dest, src->strlen);
dest->bufused = src->strlen;
dest->strlen = src->strlen;
}
+ ENCODING_ITER_INIT(interpreter, src, &iter);
for (offs = 0; offs < src->strlen; ++offs) {
- c = ENCODING_GET_CODEPOINT(interpreter, src, offs);
+ c = iter.get_and_advance(interpreter, &iter);
if (c >= 0x80) {
EXCEPTION(LOSSY_CONVERSION, "lossy conversion to ascii");
}
@@ -142,7 +145,8 @@
}
STRING *
-ascii_to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset,
STRING *dest)
+ascii_to_charset(Interp *interpreter, STRING *src,
+ CHARSET *new_charset, STRING *dest)
{
charset_converter_t conversion_func;
@@ -265,6 +269,7 @@
{
INTVAL retval;
UINTVAL offs, l_len, r_len, min_len;
+ String_iter iter;
l_len = lhs->strlen;
r_len = rhs->strlen;
@@ -276,9 +281,10 @@
}
else {
UINTVAL cl, cr;
+ ENCODING_ITER_INIT(interpreter, rhs, &iter);
for (offs = 0; offs < min_len; ++offs) {
cl = ENCODING_GET_BYTE(interpreter, lhs, offs);
- cr = ENCODING_GET_CODEPOINT(interpreter, rhs, offs);
+ cr = iter.get_and_advance(interpreter, &iter);
retval = cl - cr;
if (retval)
break;
@@ -336,9 +342,11 @@
validate(Interp *interpreter, STRING *src)
{
UINTVAL codepoint, offset;
+ String_iter iter;
+ ENCODING_ITER_INIT(interpreter, src, &iter);
for (offset = 0; offset < string_length(interpreter, src); ++offset) {
- codepoint = ENCODING_GET_CODEPOINT(interpreter, src, offset);
+ codepoint = iter.get_and_advance(interpreter, &iter);
if (codepoint >= 0x80)
return 0;
}
1.10 +41 -2 parrot/encodings/fixed_8.c
Index: fixed_8.c
===================================================================
RCS file: /cvs/public/parrot/encodings/fixed_8.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- fixed_8.c 2 Mar 2005 09:03:28 -0000 1.9
+++ fixed_8.c 2 Mar 2005 10:43:14 -0000 1.10
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: fixed_8.c,v 1.9 2005/03/02 09:03:28 leo Exp $
+$Id: fixed_8.c,v 1.10 2005/03/02 10:43:14 leo Exp $
=head1 NAME
@@ -164,6 +164,43 @@
return source_string->bufused;
}
+/*
+ * iterator functions
+ */
+
+static UINTVAL
+fixed8_get_next(Interp *interpreter, String_iter *iter)
+{
+ UINTVAL c = get_byte(interpreter, iter->str, iter->charpos++);
+ iter->bytepos++;
+ return c;
+}
+
+static void
+fixed8_set_next(Interp *interpreter, String_iter *iter, UINTVAL c)
+{
+ set_byte(interpreter, iter->str, iter->charpos++, c);
+ iter->bytepos++;
+}
+
+static void
+fixed8_set_position(Interp *interpreter, String_iter *iter, UINTVAL pos)
+{
+ iter->bytepos = iter->charpos = pos;
+ assert(pos < PObj_buflen(iter->str));
+}
+
+
+static void
+iter_init(Interp *interpreter, String *src, String_iter *iter)
+{
+ iter->str = src;
+ iter->bytepos = iter->charpos = 0;
+ iter->get_and_advance = fixed8_get_next;
+ iter->set_and_advance = fixed8_set_next;
+ iter->set_position = fixed8_set_position;
+}
+
ENCODING *
Parrot_encoding_fixed_8_init(Interp *interpreter)
{
@@ -186,7 +223,9 @@
set_bytes,
become_encoding,
codepoints,
- bytes
+ bytes,
+ iter_init
+
};
memcpy(return_encoding, &base_encoding, sizeof(ENCODING));
Parrot_register_encoding(interpreter, "fixed_8", return_encoding);
1.22 +36 -8 parrot/encodings/utf8.c
Index: utf8.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf8.c,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- utf8.c 2 Mar 2005 09:03:28 -0000 1.21
+++ utf8.c 2 Mar 2005 10:43:14 -0000 1.22
@@ -1,6 +1,6 @@
/*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: utf8.c,v 1.21 2005/03/02 09:03:28 leo Exp $
+$Id: utf8.c,v 1.22 2005/03/02 10:43:14 leo Exp $
=head1 NAME
@@ -215,14 +215,18 @@
=head2 Iterator Functions
-String iteration is currently only used in C<hash_string_equal()>.
-
=over 4
=item C<static UINTVAL
-utf8_decode_and_advance(struct string_iterator_t *i)>
+utf8_decode_and_advance(Interp *, String_iter *i)>
+
+The UTF-8 implementation of the string iterator's C<get_and_advance>
+function.
+
+=item C<static void
+utf8_encode_and_advance(Interp *, String_iter *i, UINTVAL c)>
-The UTF-8 implementation of the string iterator's C<decode_and_advance>
+The UTF-8 implementation of the string iterator's C<set_and_advance>
function.
=cut
@@ -230,7 +234,7 @@
*/
static UINTVAL
-utf8_decode_and_advance(struct string_iterator_t *i)
+utf8_decode_and_advance(Interp *interpreter, String_iter *i)
{
const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);
UINTVAL c = *u8ptr;
@@ -263,6 +267,19 @@
return c;
}
+static void
+utf8_encode_and_advance(Interp *interpreter, String_iter *i, UINTVAL c)
+{
+ const STRING *s = i->str;
+ unsigned char *new_pos, *pos;
+
+ assert(i->bytepos < PObj_buflen(s) - 4);
+ pos = (unsigned char *)s->strstart + i->bytepos;
+ new_pos = utf8_encode(pos, c);
+ i->bytepos += (new_pos - pos);
+ i->charpos++;
+}
+
/*
=item C<func>
@@ -276,7 +293,7 @@
/* XXX Should use quickest direction */
static void
-utf8_set_position(struct string_iterator_t *i, Parrot_Int pos)
+utf8_set_position(Interp *interpreter, String_iter *i, UINTVAL pos)
{
const utf8_t *u8ptr = (utf8_t *)i->str->strstart;
@@ -426,6 +443,16 @@
return src->bufused;
}
+static void
+iter_init(Interp *interpreter, String *src, String_iter *iter)
+{
+ iter->str = src;
+ iter->bytepos = iter->charpos = 0;
+ iter->get_and_advance = utf8_decode_and_advance;
+ iter->set_and_advance = utf8_encode_and_advance;
+ iter->set_position = utf8_set_position;
+}
+
ENCODING *
Parrot_encoding_utf8_init(Interp *interpreter)
{
@@ -448,7 +475,8 @@
set_bytes,
become_encoding,
codepoints,
- bytes
+ bytes,
+ iter_init
};
memcpy(return_encoding, &base_encoding, sizeof(ENCODING));
Parrot_register_encoding(interpreter, "utf8", return_encoding);
1.34 +11 -1 parrot/include/parrot/encoding.h
Index: encoding.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/encoding.h,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -r1.33 -r1.34
--- encoding.h 2 Mar 2005 09:03:29 -0000 1.33
+++ encoding.h 2 Mar 2005 10:43:15 -0000 1.34
@@ -1,7 +1,7 @@
/* encoding.h
* Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: encoding.h,v 1.33 2005/03/02 09:03:29 leo Exp $
+ * $Id: encoding.h,v 1.34 2005/03/02 10:43:15 leo Exp $
* Overview:
* This is the header for the generic encoding functions
* Data Structure and Algorithms:
@@ -31,6 +31,13 @@
typedef UINTVAL (*encoding_codepoints_t)(Interp*, STRING *src);
typedef UINTVAL (*encoding_bytes_t)(Interp*, STRING *src);
+/* iterator support */
+
+struct string_iterator_t; /* s. parrot/string.h */
+
+typedef void (*encoding_iter_init_t)(Interp *, STRING *src,
+ struct string_iterator_t *);
+
struct _encoding {
const char *name;
UINTVAL max_bytes_per_codepoint;
@@ -49,6 +56,7 @@
encoding_become_encoding_t become_encoding;
encoding_codepoints_t codepoints;
encoding_bytes_t bytes;
+ encoding_iter_init_t iter_init;
};
typedef struct _encoding ENCODING;
@@ -104,6 +112,8 @@
((ENCODING *)src->encoding)->codepoints(i, src)
#define ENCODING_BYTES(i, src) \
((ENCODING *)src->encoding)->bytes(i, src)
+#define ENCODING_ITER_INIT(i, src, iter) \
+ ((ENCODING *)src->encoding)->iter_init(i, src, iter)
#endif /* PARROT_ENCODING_H_GUARD */
/*
1.61 +7 -4 parrot/include/parrot/string.h
Index: string.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string.h,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- string.h 11 Jun 2004 13:48:56 -0000 1.60
+++ string.h 2 Mar 2005 10:43:15 -0000 1.61
@@ -1,7 +1,7 @@
/* string.h
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: string.h,v 1.60 2004/06/11 13:48:56 nicholas Exp $
+ * $Id: string.h,v 1.61 2005/03/02 10:43:15 leo Exp $
* Overview:
* This is the api header for the string subsystem
* Data Structure and Algorithms:
@@ -38,9 +38,12 @@
const String *str;
UINTVAL bytepos;
UINTVAL charpos;
- UINTVAL (*decode_and_advance)(struct string_iterator_t *i);
- void (*set_position)(struct string_iterator_t *i, INTVAL pos);
-} string_iterator;
+ UINTVAL (*get_and_advance)(Interp *, struct string_iterator_t *i);
+ void (*set_and_advance)(Interp *, struct string_iterator_t *i, UINTVAL
c);
+ void (*set_position)(Interp *, struct string_iterator_t *i, UINTVAL pos);
+} String_iter;
+
+void string_iter_init(Interp *, const String *str, String_iter *);
/* stringinfo parameters */
1.241 +1 -3 parrot/src/string.c
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.240
retrieving revision 1.241
diff -u -r1.240 -r1.241
--- string.c 2 Mar 2005 09:03:30 -0000 1.240
+++ string.c 2 Mar 2005 10:43:16 -0000 1.241
@@ -1,6 +1,6 @@
/*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: string.c,v 1.240 2005/03/02 09:03:30 leo Exp $
+$Id: string.c,v 1.241 2005/03/02 10:43:16 leo Exp $
=head1 NAME
@@ -900,9 +900,7 @@
INTVAL
string_compute_strlen(Interp *interpreter, STRING *s)
{
- /* taking advantage of int value of the enum */
s->strlen = CHARSET_CODEPOINTS(interpreter, s);
-
return s->strlen;
}