Author: leo
Date: Fri Nov 11 08:07:45 2005
New Revision: 9904
Modified:
trunk/charset/unicode.c
trunk/encodings/utf8.c
Log:
unicode charset
* remove & replace unicode.charset by just calling utf8.to_encoding
* fix that (iso-8859-1 is definitely not char utf8 compat ;-)
Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c (original)
+++ trunk/charset/unicode.c Fri Nov 11 08:07:45 2005
@@ -61,39 +61,12 @@ static STRING*
to_charset(Interp *interpreter, STRING *src, STRING *dest)
{
charset_converter_t conversion_func;
- String_iter iter;
- UINTVAL c, len, offs;
if ((conversion_func = Parrot_find_charset_converter(interpreter,
src->charset, Parrot_unicode_charset_ptr))) {
return conversion_func(interpreter, src, dest);
}
- len = src->strlen;
- if (dest) {
- Parrot_reallocate_string(interpreter, dest, len);
- dest->charset = Parrot_unicode_charset_ptr;
- dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interpreter, dest);
- ENCODING_ITER_INIT(interpreter, dest, &iter);
- for (offs = 0; offs < src->strlen; ++offs) {
- c = ENCODING_GET_CODEPOINT(interpreter, src, offs);
- if (iter.bytepos >= PObj_buflen(dest) - 4) {
- UINTVAL need = (UINTVAL)( (src->strlen - offs) * 1.5 );
- if (need < 16)
- need = 16;
- Parrot_reallocate_string(interpreter, dest,
- PObj_buflen(dest) + need);
- }
- iter.set_and_advance(interpreter, &iter, c);
- }
- dest->bufused = iter.bytepos;
- dest->strlen = iter.charpos;
- return dest;
- }
- else {
- internal_exception(UNIMPLEMENTED,
- "to_charset inplace for unicode not implemented");
- }
- return NULL;
+ return Parrot_utf8_encoding_ptr->to_encoding(interpreter, src, dest);
}
Modified: trunk/encodings/utf8.c
==============================================================================
--- trunk/encodings/utf8.c (original)
+++ trunk/encodings/utf8.c Fri Nov 11 08:07:45 2005
@@ -326,6 +326,8 @@ to_encoding(Interp *interpreter, STRING
result = dest;
}
+ /* init iter before possilby changing encoding */
+ ENCODING_ITER_INIT(interpreter, src, &src_iter);
result->charset = Parrot_unicode_charset_ptr;
result->encoding = Parrot_utf8_encoding_ptr;
result->strlen = src_len;
@@ -341,15 +343,13 @@ to_encoding(Interp *interpreter, STRING
Parrot_reallocate_string(interpreter, dest, src_len);
p = dest->strstart;
}
- if (src->charset == Parrot_iso_8859_1_charset_ptr ||
- src->charset == Parrot_ascii_charset_ptr) {
+ if (src->charset == Parrot_ascii_charset_ptr) {
for (dest_len = 0; dest_len < src_len; ++dest_len) {
p[dest_len] = ((unsigned char*)src->strstart)[dest_len];
}
result->bufused = dest_len;
}
else {
- ENCODING_ITER_INIT(interpreter, src, &src_iter);
dest_len = src_len;
dest_pos = 0;
for (offs = 0; offs < src_len; ++offs) {