Author: leo
Date: Thu Nov 10 02:25:50 2005
New Revision: 9876
Modified:
trunk/charset/unicode.c
trunk/src/string.c
trunk/t/op/string_cs.t
Log:
unicode improvements - chopn
* fix chopn bufused was wrong
* it's still b0rked with immutable strings
* test
Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c (original)
+++ trunk/charset/unicode.c Thu Nov 10 02:25:50 2005
@@ -151,6 +151,9 @@ u_strToLower(UChar *dest, int32_t destCa
&err);
assert(U_SUCCESS(err));
}
+ /* downgrade if possible */
+ if (dest_len == (int)src->strlen)
+ src->encoding = Parrot_ucs2_encoding_ptr;
#else
real_exception(interpreter, NULL, E_LibraryNotLoadedError,
"no ICU lib loaded");
Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c (original)
+++ trunk/src/string.c Thu Nov 10 02:25:50 2005
@@ -1281,7 +1281,13 @@ C<n> is negative, cuts the string after
STRING *
string_chopn(Interp *interpreter, STRING *s, INTVAL n)
{
- UINTVAL new_length;
+ UINTVAL new_length, uchar_size;
+ String_iter iter;
+ /*
+ * FIXME constant or external strings can't be chopped inplace
+ */
+ if (!s)
+ return NULL;
if (n < 0) {
new_length = -n;
@@ -1294,10 +1300,24 @@ string_chopn(Interp *interpreter, STRING
else
new_length = 0;
}
-
- s->strlen = new_length;
- s->bufused = string_max_bytes(interpreter, s, new_length);
s->hashval = 0;
+ if (!new_length || !s->strlen) {
+ s->bufused = s->strlen = 0;
+ return s;
+ }
+ uchar_size = s->bufused / s->strlen;
+ s->strlen = new_length;
+ if (s->encoding == Parrot_fixed_8_encoding_ptr) {
+ s->bufused = new_length;
+ }
+ else if (s->encoding == Parrot_ucs2_encoding_ptr) {
+ s->bufused = new_length * uchar_size;
+ }
+ else {
+ ENCODING_ITER_INIT(interpreter, s, &iter);
+ iter.set_position(interpreter, &iter, new_length);
+ s->bufused = iter.bytepos;
+ }
return s;
}
Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t (original)
+++ trunk/t/op/string_cs.t Thu Nov 10 02:25:50 2005
@@ -16,7 +16,7 @@ Tests charset support.
=cut
-use Parrot::Test tests => 34;
+use Parrot::Test tests => 35;
use Test::More;
output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -556,3 +556,18 @@ CODE
84_214_84_83_67_72_
OUTPUT
+output_is( <<'CODE', <<"OUTPUT", "chopn utf8");
+ set S0, iso-8859-1:"TT��"
+ find_charset I0, "unicode"
+ trans_charset S1, S0, I0
+ chopn S1, 2
+ print_item S1
+ length I0, S1
+ print_item I0
+ .include "stringinfo.pasm"
+ stringinfo I0, S1, .STRINGINFO_BUFUSED
+ print_item I0
+ print_newline
+CODE
+TT 2 2
+OUTPUT