Author: leo
Date: Fri Nov 11 06:55:00 2005
New Revision: 9902

Modified:
   trunk/charset/unicode.c
   trunk/t/op/string_cs.t
Log:
unicode charsets - upcase, titlecase

* c&p job of the downcase code
* 2 simple tests


Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Fri Nov 11 06:55:00 2005
@@ -110,9 +110,43 @@ decompose(Interp *interpreter, STRING *s
 }
 
 static void
-upcase(Interp *interpreter, STRING *source_string)
+upcase(Interp *interpreter, STRING *src)
 {
-    UNIMPL;
+#if PARROT_HAS_ICU
+
+    UErrorCode err;
+    int dest_len, src_len;
+
+    src = Parrot_utf16_encoding_ptr->to_encoding(interpreter, src, NULL);
+    /*
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode);
+     */
+    err = U_ZERO_ERROR;
+    src_len = src->bufused / sizeof(UChar);
+    dest_len = u_strToUpper(src->strstart, src_len,
+            src->strstart, src_len,
+            NULL,       /* locale = default */
+            &err);
+    src->bufused = dest_len * sizeof(UChar);
+    if (!U_SUCCESS(err)) {
+        Parrot_reallocate_string(interpreter, src, src->bufused);
+        dest_len = u_strToUpper(src->strstart, dest_len,
+                src->strstart, src_len,
+                NULL,       /* locale = default */
+                &err);
+        assert(U_SUCCESS(err));
+    }
+    /* downgrade if possible */
+    if (dest_len == (int)src->strlen)
+        src->encoding = Parrot_ucs2_encoding_ptr;
+#else
+    real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+            "no ICU lib loaded");
+#endif
 }
 
 static void
@@ -156,9 +190,45 @@ u_strToLower(UChar *dest, int32_t destCa
 }
 
 static void
-titlecase(Interp *interpreter, STRING *source_string)
+titlecase(Interp *interpreter, STRING *src)
 {
-    UNIMPL;
+#if PARROT_HAS_ICU
+
+    UErrorCode err;
+    int dest_len, src_len;
+
+    src = Parrot_utf16_encoding_ptr->to_encoding(interpreter, src, NULL);
+    /*
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UBreakIterator *titleIter,
+             const char *locale,
+             UErrorCode *pErrorCode);
+     */
+    err = U_ZERO_ERROR;
+    src_len = src->bufused / sizeof(UChar);
+    dest_len = u_strToTitle(src->strstart, src_len,
+            src->strstart, src_len,
+            NULL,       /* default titleiter */
+            NULL,       /* locale = default */
+            &err);
+    src->bufused = dest_len * sizeof(UChar);
+    if (!U_SUCCESS(err)) {
+        Parrot_reallocate_string(interpreter, src, src->bufused);
+        dest_len = u_strToTitle(src->strstart, dest_len,
+                src->strstart, src_len,
+                NULL, NULL,
+                &err);
+        assert(U_SUCCESS(err));
+    }
+    /* downgrade if possible */
+    if (dest_len == (int)src->strlen)
+        src->encoding = Parrot_ucs2_encoding_ptr;
+#else
+    real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+            "no ICU lib loaded");
+#endif
 }
 
 static void

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Fri Nov 11 06:55:00 2005
@@ -16,7 +16,7 @@ Tests charset support.
 
 =cut
 
-use Parrot::Test tests => 41;
+use Parrot::Test tests => 43;
 use Parrot::Config;
 use Test::More;
 
@@ -507,7 +507,7 @@ abcdefg
 OUTPUT
 
 SKIP: {
-  skip('no ICU lib', 10) unless $PConfig{has_icu};
+  skip('no ICU lib', 12) unless $PConfig{has_icu};
 output_is( <<'CODE', <<"OUTPUT", "unicode downcase");
     set S0, iso-8859-1:"T�TSCH"
     find_charset I0, "unicode"
@@ -691,4 +691,32 @@ CODE
 6
 OUTPUT
 
+output_is( <<'CODE', <<"OUTPUT", "unicode upcase");
+    set S0, iso-8859-1:"t�tsch"
+    find_charset I0, "unicode"
+    trans_charset S1, S0, I0
+    upcase S1
+    getstdout P0          # need to convert back to utf8
+    push P0, "utf8"       # push utf8 output layer
+    print S1
+    print "\n"
+    end
+CODE
+T\xc3\x96TSCH
+OUTPUT
+
+output_is( <<'CODE', <<"OUTPUT", "unicode titlecase");
+    set S0, iso-8859-1:"t�tsch leo"
+    find_charset I0, "unicode"
+    trans_charset S1, S0, I0
+    titlecase S1
+    getstdout P0          # need to convert back to utf8
+    push P0, "utf8"       # push utf8 output layer
+    print S1
+    print "\n"
+    end
+CODE
+T\xc3\xb6tsch Leo
+OUTPUT
+
 }  # SKIP

Reply via email to