Author: leo
Date: Fri Nov 11 06:55:00 2005
New Revision: 9902
Modified:
trunk/charset/unicode.c
trunk/t/op/string_cs.t
Log:
unicode charsets - upcase, titlecase
* c&p job of the downcase code
* 2 simple tests
Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c (original)
+++ trunk/charset/unicode.c Fri Nov 11 06:55:00 2005
@@ -110,9 +110,43 @@ decompose(Interp *interpreter, STRING *s
}
static void
-upcase(Interp *interpreter, STRING *source_string)
+upcase(Interp *interpreter, STRING *src)
{
- UNIMPL;
+#if PARROT_HAS_ICU
+
+ UErrorCode err;
+ int dest_len, src_len;
+
+ src = Parrot_utf16_encoding_ptr->to_encoding(interpreter, src, NULL);
+ /*
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+ */
+ err = U_ZERO_ERROR;
+ src_len = src->bufused / sizeof(UChar);
+ dest_len = u_strToUpper(src->strstart, src_len,
+ src->strstart, src_len,
+ NULL, /* locale = default */
+ &err);
+ src->bufused = dest_len * sizeof(UChar);
+ if (!U_SUCCESS(err)) {
+ Parrot_reallocate_string(interpreter, src, src->bufused);
+ dest_len = u_strToUpper(src->strstart, dest_len,
+ src->strstart, src_len,
+ NULL, /* locale = default */
+ &err);
+ assert(U_SUCCESS(err));
+ }
+ /* downgrade if possible */
+ if (dest_len == (int)src->strlen)
+ src->encoding = Parrot_ucs2_encoding_ptr;
+#else
+ real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+ "no ICU lib loaded");
+#endif
}
static void
@@ -156,9 +190,45 @@ u_strToLower(UChar *dest, int32_t destCa
}
static void
-titlecase(Interp *interpreter, STRING *source_string)
+titlecase(Interp *interpreter, STRING *src)
{
- UNIMPL;
+#if PARROT_HAS_ICU
+
+ UErrorCode err;
+ int dest_len, src_len;
+
+ src = Parrot_utf16_encoding_ptr->to_encoding(interpreter, src, NULL);
+ /*
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode);
+ */
+ err = U_ZERO_ERROR;
+ src_len = src->bufused / sizeof(UChar);
+ dest_len = u_strToTitle(src->strstart, src_len,
+ src->strstart, src_len,
+ NULL, /* default titleiter */
+ NULL, /* locale = default */
+ &err);
+ src->bufused = dest_len * sizeof(UChar);
+ if (!U_SUCCESS(err)) {
+ Parrot_reallocate_string(interpreter, src, src->bufused);
+ dest_len = u_strToTitle(src->strstart, dest_len,
+ src->strstart, src_len,
+ NULL, NULL,
+ &err);
+ assert(U_SUCCESS(err));
+ }
+ /* downgrade if possible */
+ if (dest_len == (int)src->strlen)
+ src->encoding = Parrot_ucs2_encoding_ptr;
+#else
+ real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+ "no ICU lib loaded");
+#endif
}
static void
Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t (original)
+++ trunk/t/op/string_cs.t Fri Nov 11 06:55:00 2005
@@ -16,7 +16,7 @@ Tests charset support.
=cut
-use Parrot::Test tests => 41;
+use Parrot::Test tests => 43;
use Parrot::Config;
use Test::More;
@@ -507,7 +507,7 @@ abcdefg
OUTPUT
SKIP: {
- skip('no ICU lib', 10) unless $PConfig{has_icu};
+ skip('no ICU lib', 12) unless $PConfig{has_icu};
output_is( <<'CODE', <<"OUTPUT", "unicode downcase");
set S0, iso-8859-1:"T�TSCH"
find_charset I0, "unicode"
@@ -691,4 +691,32 @@ CODE
6
OUTPUT
+output_is( <<'CODE', <<"OUTPUT", "unicode upcase");
+ set S0, iso-8859-1:"t�tsch"
+ find_charset I0, "unicode"
+ trans_charset S1, S0, I0
+ upcase S1
+ getstdout P0 # need to convert back to utf8
+ push P0, "utf8" # push utf8 output layer
+ print S1
+ print "\n"
+ end
+CODE
+T\xc3\x96TSCH
+OUTPUT
+
+output_is( <<'CODE', <<"OUTPUT", "unicode titlecase");
+ set S0, iso-8859-1:"t�tsch leo"
+ find_charset I0, "unicode"
+ trans_charset S1, S0, I0
+ titlecase S1
+ getstdout P0 # need to convert back to utf8
+ push P0, "utf8" # push utf8 output layer
+ print S1
+ print "\n"
+ end
+CODE
+T\xc3\xb6tsch Leo
+OUTPUT
+
} # SKIP