Author: leo
Date: Thu Nov 10 06:42:46 2005
New Revision: 9880
Modified:
trunk/encodings/fixed_8.c
trunk/encodings/ucs2.c
trunk/encodings/utf16.c
trunk/encodings/utf8.c
Log:
unicode improvements - string_substr
* string_substr should now handle all charsets and encodings
* fix some usage of UChar outside PARROT_HAS_ICU
Tests welcome
Modified: trunk/encodings/fixed_8.c
==============================================================================
--- trunk/encodings/fixed_8.c (original)
+++ trunk/encodings/fixed_8.c Thu Nov 10 06:42:46 2005
@@ -118,8 +118,6 @@ get_bytes_inplace(Interp *interpreter, S
UINTVAL offset, UINTVAL count, STRING *return_string)
{
Parrot_reuse_COW_reference(interpreter, source_string, return_string);
- return_string->encoding = source_string->encoding;
- return_string->charset = source_string->charset;
return_string->strstart = (char *)return_string->strstart + offset ;
return_string->bufused = count;
Modified: trunk/encodings/ucs2.c
==============================================================================
--- trunk/encodings/ucs2.c (original)
+++ trunk/encodings/ucs2.c Thu Nov 10 06:42:46 2005
@@ -69,16 +69,27 @@ copy_to_encoding(Interp *interpreter, ST
static UINTVAL
get_codepoint(Interp *interpreter, const STRING *src, UINTVAL offset)
{
+#if PARROT_HAS_ICU
UChar *s = (UChar*) src->strstart;
return s[offset];
+#else
+ real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+ "no ICU lib loaded");
+ return 0;
+#endif
}
static void
set_codepoint(Interp *interpreter, STRING *src,
UINTVAL offset, UINTVAL codepoint)
{
+#if PARROT_HAS_ICU
UChar *s = (UChar*) src->strstart;
s[offset] = codepoint;
+#else
+ real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+ "no ICU lib loaded");
+#endif
}
static UINTVAL
@@ -99,18 +110,23 @@ static STRING *
get_codepoints(Interp *interpreter, STRING *src,
UINTVAL offset, UINTVAL count)
{
- String_iter iter;
- UINTVAL start;
- STRING *return_string = Parrot_make_COW_reference(interpreter,
- src);
- return_string->encoding = src->encoding;
- return_string->charset = src->charset;
- iter_init(interpreter, src, &iter);
- iter.set_position(interpreter, &iter, offset);
- start = iter.bytepos;
- return_string->strstart = (char *)return_string->strstart + start ;
- iter.set_position(interpreter, &iter, offset + count);
- return_string->bufused = iter.bytepos - start;
+ STRING *return_string = Parrot_make_COW_reference(interpreter, src);
+#if PARROT_HAS_ICU
+ return_string->strstart = (char*)src->strstart + offset * sizeof(UChar);
+ return_string->bufused = count * sizeof(UChar);
+#else
+ {
+ String_iter iter;
+ UINTVAL start;
+
+ iter_init(interpreter, src, &iter);
+ iter.set_position(interpreter, &iter, offset);
+ start = iter.bytepos;
+ return_string->strstart = (char *)return_string->strstart + start;
+ iter.set_position(interpreter, &iter, offset + count);
+ return_string->bufused = iter.bytepos - start;
+ }
+#endif
return_string->strlen = count;
return_string->hashval = 0;
return return_string;
Modified: trunk/encodings/utf16.c
==============================================================================
--- trunk/encodings/utf16.c (original)
+++ trunk/encodings/utf16.c Thu Nov 10 06:42:46 2005
@@ -194,8 +194,6 @@ get_codepoints(Interp *interpreter, STRI
UINTVAL start;
STRING *return_string = Parrot_make_COW_reference(interpreter,
src);
- return_string->encoding = src->encoding;
- return_string->charset = src->charset;
iter_init(interpreter, src, &iter);
iter.set_position(interpreter, &iter, offset);
start = iter.bytepos;
@@ -207,30 +205,29 @@ get_codepoints(Interp *interpreter, STRI
return return_string;
}
+
static STRING *
-get_bytes(Interp *interpreter, STRING *src,
- UINTVAL offset, UINTVAL count)
+get_codepoints_inplace(Interp *interpreter, STRING *src,
+ UINTVAL offset, UINTVAL count, STRING *return_string)
{
- STRING *return_string = Parrot_make_COW_reference(interpreter,
- src);
- return_string->encoding = src->encoding; /* XXX */
- return_string->charset = src->charset;
-
- return_string->strstart = (char *)return_string->strstart + offset ;
- return_string->bufused = count;
-
+ String_iter iter;
+ UINTVAL start;
+ Parrot_reuse_COW_reference(interpreter, src, return_string);
+ iter_init(interpreter, src, &iter);
+ iter.set_position(interpreter, &iter, offset);
+ start = iter.bytepos;
+ return_string->strstart = (char *)return_string->strstart + start ;
+ iter.set_position(interpreter, &iter, offset + count);
+ return_string->bufused = iter.bytepos - start;
return_string->strlen = count;
return_string->hashval = 0;
-
return return_string;
}
-
static STRING *
-get_codepoints_inplace(Interp *interpreter, STRING *src,
- UINTVAL offset, UINTVAL count, STRING *dest_string)
+get_bytes(Interp *interpreter, STRING *src,
+ UINTVAL offset, UINTVAL count)
{
-
UNIMPL;
return NULL;
}
Modified: trunk/encodings/utf8.c
==============================================================================
--- trunk/encodings/utf8.c (original)
+++ trunk/encodings/utf8.c Thu Nov 10 06:42:46 2005
@@ -415,8 +415,6 @@ get_codepoints(Interp *interpreter, STRI
UINTVAL start;
STRING *return_string = Parrot_make_COW_reference(interpreter,
src);
- return_string->encoding = src->encoding;
- return_string->charset = src->charset;
iter_init(interpreter, src, &iter);
iter.set_position(interpreter, &iter, offset);
start = iter.bytepos;
@@ -449,11 +447,20 @@ get_bytes(Interp *interpreter, STRING *s
static STRING *
get_codepoints_inplace(Interp *interpreter, STRING *src,
- UINTVAL offset, UINTVAL count, STRING *dest_string)
+ UINTVAL offset, UINTVAL count, STRING *return_string)
{
-
- UNIMPL;
- return NULL;
+ String_iter iter;
+ UINTVAL start;
+ Parrot_reuse_COW_reference(interpreter, src, return_string);
+ iter_init(interpreter, src, &iter);
+ iter.set_position(interpreter, &iter, offset);
+ start = iter.bytepos;
+ return_string->strstart = (char *)return_string->strstart + start ;
+ iter.set_position(interpreter, &iter, offset + count);
+ return_string->bufused = iter.bytepos - start;
+ return_string->strlen = count;
+ return_string->hashval = 0;
+ return return_string;
}
static STRING *