cvsuser 03/11/15 03:11:28
Modified: encodings dbcs.c singlebyte.c utf16.c utf32.c utf8.c
include/parrot encoding.h string.h string_funcs.h
src string.c
Log:
Add string iterator function set_position; currently used in string_chopn
Revision Changes Path
1.3 +18 -2 parrot/encodings/dbcs.c
Index: dbcs.c
===================================================================
RCS file: /cvs/public/parrot/encodings/dbcs.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -w -r1.2 -r1.3
--- dbcs.c 14 Nov 2003 20:26:38 -0000 1.2
+++ dbcs.c 15 Nov 2003 11:11:20 -0000 1.3
@@ -1,7 +1,7 @@
/* dbcs.c
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: dbcs.c,v 1.2 2003/11/14 20:26:38 petergibbs Exp $
+ * $Id: dbcs.c,v 1.3 2003/11/15 11:11:20 petergibbs Exp $
* Overview:
* This defines the DBCS encoding routines.
* Data Structure and Algorithms:
@@ -106,6 +106,21 @@
}
}
+static void
+dbcs_set_position(struct string_iterator_t *i, Parrot_Int pos)
+{
+ const byte_t *bptr = (char *)i->str->strstart;
+
+ i->charpos = pos;
+ while (pos--) {
+ if (*bptr > 127)
+ bptr += 2;
+ else
+ bptr++;
+ }
+ i->bytepos = (const char *)bptr - (const char *)i->str->strstart;
+}
+
const ENCODING dbcs_encoding = {
enum_encoding_dbcs,
"dbcs",
@@ -115,7 +130,8 @@
dbcs_encode,
dbcs_skip_forward,
dbcs_skip_backward,
- dbcs_decode_and_advance
+ dbcs_decode_and_advance,
+ dbcs_set_position
};
/*
1.18 +9 -2 parrot/encodings/singlebyte.c
Index: singlebyte.c
===================================================================
RCS file: /cvs/public/parrot/encodings/singlebyte.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -w -r1.17 -r1.18
--- singlebyte.c 14 Nov 2003 20:26:40 -0000 1.17
+++ singlebyte.c 15 Nov 2003 11:11:20 -0000 1.18
@@ -1,7 +1,7 @@
/* singlebyte.c
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: singlebyte.c,v 1.17 2003/11/14 20:26:40 petergibbs Exp $
+ * $Id: singlebyte.c,v 1.18 2003/11/15 11:11:20 petergibbs Exp $
* Overview:
* This defines the single byte encoding routines.
* Data Structure and Algorithms:
@@ -68,6 +68,12 @@
return *ptr;
}
+static void
+singlebyte_set_position(struct string_iterator_t *i, Parrot_Int pos)
+{
+ i->bytepos = i->charpos = pos;
+}
+
const ENCODING singlebyte_encoding = {
enum_encoding_singlebyte,
"singlebyte",
@@ -77,7 +83,8 @@
singlebyte_encode,
singlebyte_skip_forward,
singlebyte_skip_backward,
- singlebyte_decode_and_advance
+ singlebyte_decode_and_advance,
+ singlebyte_set_position
};
/*
1.15 +29 -3 parrot/encodings/utf16.c
Index: utf16.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf16.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -w -r1.14 -r1.15
--- utf16.c 14 Nov 2003 20:26:40 -0000 1.14
+++ utf16.c 15 Nov 2003 11:11:20 -0000 1.15
@@ -1,7 +1,7 @@
/* utf16.c
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: utf16.c,v 1.14 2003/11/14 20:26:40 petergibbs Exp $
+ * $Id: utf16.c,v 1.15 2003/11/15 11:11:20 petergibbs Exp $
* Overview:
* This defines the UTF-16 encoding routines.
* Data Structure and Algorithms:
@@ -134,7 +134,7 @@
static UINTVAL
utf16_decode_and_advance(struct string_iterator_t *i)
{
- const utf16_t *u16ptr = (char *)i->str->strstart + i->bytepos;
+ const utf16_t *u16ptr = (utf16_t *)((char *)i->str->strstart + i->bytepos);
UINTVAL c = *u16ptr++;
if (UNICODE_IS_HIGH_SURROGATE(c)) {
@@ -159,6 +159,31 @@
return c;
}
+/* XXX Should use quickest direction */
+static void
+utf16_set_position(struct string_iterator_t *i, Parrot_Int pos)
+{
+ const utf16_t *u16ptr = (utf16_t *)i->str->strstart;
+
+ i->charpos = pos;
+ while (pos-- > 0) {
+ if (UNICODE_IS_HIGH_SURROGATE(*u16ptr)) {
+ u16ptr++;
+
+ if (!UNICODE_IS_LOW_SURROGATE(*u16ptr)) {
+ internal_exception(MALFORMED_UTF16,
+ "Malformed UTF-16 surrogate\n");
+ }
+ }
+ else if (UNICODE_IS_LOW_SURROGATE(*u16ptr)) {
+ internal_exception(MALFORMED_UTF16,
+ "Malformed UTF-16 surrogate\n");
+ }
+ u16ptr++;
+ }
+ i->bytepos = (const char *)u16ptr - (const char *)i->str->strstart;
+}
+
const ENCODING utf16_encoding = {
enum_encoding_utf16,
"utf16",
@@ -168,7 +193,8 @@
utf16_encode,
utf16_skip_forward,
utf16_skip_backward,
- utf16_decode_and_advance
+ utf16_decode_and_advance,
+ utf16_set_position
};
/*
1.13 +10 -2 parrot/encodings/utf32.c
Index: utf32.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf32.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -w -r1.12 -r1.13
--- utf32.c 14 Nov 2003 20:26:40 -0000 1.12
+++ utf32.c 15 Nov 2003 11:11:20 -0000 1.13
@@ -1,7 +1,7 @@
/* utf32.c
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: utf32.c,v 1.12 2003/11/14 20:26:40 petergibbs Exp $
+ * $Id: utf32.c,v 1.13 2003/11/15 11:11:20 petergibbs Exp $
* Overview:
* This defines the UTF-32 encoding routines.
* Data Structure and Algorithms:
@@ -73,6 +73,13 @@
return *u32ptr;
}
+static void
+utf32_set_position(struct string_iterator_t *i, Parrot_Int pos)
+{
+ i->charpos = pos;
+ i->bytepos = pos * 4;
+}
+
const ENCODING utf32_encoding = {
enum_encoding_utf32,
"utf32",
@@ -82,7 +89,8 @@
utf32_encode,
utf32_skip_forward,
utf32_skip_backward,
- utf32_decode_and_advance
+ utf32_decode_and_advance,
+ utf32_set_position
};
/*
1.16 +17 -5 parrot/encodings/utf8.c
Index: utf8.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf8.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -w -r1.15 -r1.16
--- utf8.c 14 Nov 2003 20:26:40 -0000 1.15
+++ utf8.c 15 Nov 2003 11:11:20 -0000 1.16
@@ -1,7 +1,7 @@
/* utf8.c
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: utf8.c,v 1.15 2003/11/14 20:26:40 petergibbs Exp $
+ * $Id: utf8.c,v 1.16 2003/11/15 11:11:20 petergibbs Exp $
* Overview:
* This defines the UTF-8 encoding routines.
* Data Structure and Algorithms:
@@ -136,16 +136,14 @@
static UINTVAL
utf8_decode_and_advance(struct string_iterator_t *i)
{
- const utf8_t *u8ptr = (char *)i->str->strstart + i->bytepos;
+ const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);
UINTVAL c = *u8ptr;
if (UTF8_IS_START(c)) {
UINTVAL len = UTF8SKIP(u8ptr);
- UINTVAL count;
c &= UTF8_START_MASK(len);
i->bytepos += len;
-// for (count = 1; count < len; count++) {
for (len--; len; len--) {
u8ptr++;
if (!UTF8_IS_CONTINUATION(*u8ptr)) {
@@ -169,6 +167,19 @@
return c;
}
+/* XXX Should use quickest direction */
+static void
+utf8_set_position(struct string_iterator_t *i, Parrot_Int pos)
+{
+ const utf8_t *u8ptr = (char *)i->str->strstart;
+
+ i->charpos = pos;
+ while (pos-- > 0) {
+ u8ptr += UTF8SKIP(u8ptr);
+ }
+ i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;
+}
+
const ENCODING utf8_encoding = {
enum_encoding_utf8,
"utf8",
@@ -178,7 +189,8 @@
utf8_encode,
utf8_skip_forward,
utf8_skip_backward,
- utf8_decode_and_advance
+ utf8_decode_and_advance,
+ utf8_set_position
};
/*
1.24 +2 -1 parrot/include/parrot/encoding.h
Index: encoding.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/encoding.h,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -w -r1.23 -r1.24
--- encoding.h 14 Nov 2003 20:26:55 -0000 1.23
+++ encoding.h 15 Nov 2003 11:11:24 -0000 1.24
@@ -1,7 +1,7 @@
/* encoding.h
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: encoding.h,v 1.23 2003/11/14 20:26:55 petergibbs Exp $
+ * $Id: encoding.h,v 1.24 2003/11/15 11:11:24 petergibbs Exp $
* Overview:
* This is the api header for the string encoding subsystem
* Data Structure and Algorithms:
@@ -37,6 +37,7 @@
const void *(*skip_forward) (const void *ptr, Parrot_UInt n);
const void *(*skip_backward) (const void *ptr, Parrot_UInt n);
Parrot_UInt(*decode_and_advance) (struct string_iterator_t *i);
+ void (*set_position)(struct string_iterator_t *i, Parrot_Int pos);
};
typedef struct parrot_encoding_t* Parrot_Encoding;
1.56 +6 -5 parrot/include/parrot/string.h
Index: string.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string.h,v
retrieving revision 1.55
retrieving revision 1.56
diff -u -w -r1.55 -r1.56
--- string.h 14 Nov 2003 20:26:57 -0000 1.55
+++ string.h 15 Nov 2003 11:11:24 -0000 1.56
@@ -1,7 +1,7 @@
/* string.h
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: string.h,v 1.55 2003/11/14 20:26:57 petergibbs Exp $
+ * $Id: string.h,v 1.56 2003/11/15 11:11:24 petergibbs Exp $
* Overview:
* This is the api header for the string subsystem
* Data Structure and Algorithms:
@@ -41,6 +41,7 @@
UINTVAL bytepos;
UINTVAL charpos;
UINTVAL (*decode_and_advance)(struct string_iterator_t *i);
+ void (*set_position)(struct string_iterator_t *i, INTVAL pos);
} string_iterator;
/* stringinfo parameters */
1.31 +2 -1 parrot/include/parrot/string_funcs.h
Index: string_funcs.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -w -r1.30 -r1.31
--- string_funcs.h 14 Nov 2003 20:26:58 -0000 1.30
+++ string_funcs.h 15 Nov 2003 11:11:24 -0000 1.31
@@ -1,7 +1,7 @@
/* string_funcs.h
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: string_funcs.h,v 1.30 2003/11/14 20:26:58 petergibbs Exp $
+ * $Id: string_funcs.h,v 1.31 2003/11/15 11:11:24 petergibbs Exp $
* Overview:
* This is the api header for the string subsystem
* Data Structure and Algorithms:
@@ -70,6 +70,7 @@
STRING *string_bitwise_xor(struct Parrot_Interp *interpreter, STRING *s1,
STRING *s2, STRING **dest);
void string_iterator_init(struct string_iterator_t *i, STRING *s);
+UINTVAL string_decode_and_advance(struct string_iterator_t *i);
#endif
1.159 +17 -19 parrot/src/string.c
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.158
retrieving revision 1.159
diff -u -w -r1.158 -r1.159
--- string.c 14 Nov 2003 20:27:02 -0000 1.158
+++ string.c 15 Nov 2003 11:11:27 -0000 1.159
@@ -1,7 +1,7 @@
/* string.c
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: string.c,v 1.158 2003/11/14 20:27:02 petergibbs Exp $
+ * $Id: string.c,v 1.159 2003/11/15 11:11:27 petergibbs Exp $
* Overview:
* This is the api definitions for the string subsystem
* Data Structure and Algorithms:
@@ -901,30 +901,26 @@
STRING *
string_chopn(STRING *s, INTVAL n)
{
- const char *strstart = s->strstart;
- const char *bufend = strstart + s->bufused;
- UINTVAL true_n;
+ UINTVAL new_length;
+ struct string_iterator_t it;
- true_n = (UINTVAL)n;
if (n < 0) {
- n = -n;
- true_n = (UINTVAL)n;
- if (true_n > s->strlen)
- true_n = s->strlen;
- bufend = s->encoding->skip_forward(strstart, true_n);
- s->bufused = bufend - strstart;
- s->strlen = true_n;
+ new_length = -n;
+ if (new_length > s->strlen)
+ return s;
}
else {
- if (true_n > s->strlen)
- true_n = s->strlen;
-
- bufend = s->encoding->skip_backward(bufend, true_n);
-
- s->bufused = bufend - strstart;
- s->strlen = s->strlen - true_n;
+ if (s->strlen > (UINTVAL)n)
+ new_length = s->strlen - n;
+ else
+ new_length = 0;
}
+ string_iterator_init(&it, s);
+ if (new_length > 0)
+ it.set_position(&it, new_length);
+ s->strlen = new_length;
+ s->bufused = it.bytepos;
return s;
}
@@ -1655,7 +1651,9 @@
i->str = s;
i->bytepos = 0;
i->charpos = 0;
+ /* XXX change this to a bulk copy */
i->decode_and_advance = s->encoding->decode_and_advance;
+ i->set_position = s->encoding->set_position;
}
/*