Author: leo
Date: Wed Nov 16 06:39:41 2005
New Revision: 10022

Modified:
   trunk/charset/unicode.c
Log:
initial workaround for string_cs_45

Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Wed Nov 16 06:39:41 2005
@@ -88,47 +88,52 @@ upcase(Interp *interpreter, STRING *src)
 #if PARROT_HAS_ICU
 
     UErrorCode err;
-    int dest_len, src_len;
+    int dest_len, src_len, needed;
 
     src = Parrot_utf16_encoding_ptr->to_encoding(interpreter, src, NULL);
     /*
-U_CAPI int32_t U_EXPORT2
-u_strToUpper(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
-     */
+       U_CAPI int32_t U_EXPORT2
+       u_strToUpper(UChar *dest, int32_t destCapacity,
+       const UChar *src, int32_t srcLength,
+       const char *locale,
+       UErrorCode *pErrorCode);
+       */
     err = U_ZERO_ERROR;
     /* use all available space - see below XXX */
     /* TODO downcase, titlecase too */
     dest_len = PObj_buflen(src) / sizeof(UChar);
     src_len = src->bufused / sizeof(UChar);
+    /*
+     * XXX troubles:
+     *   t/op/string_cs_45  upcase unicode:"\u01f0"
+     *   this creates \u004a \u030c J+NON-SPACING HACEK
+     *   the string needs resizing, *if* the src buffer is
+     *   too short. *But* with icu 3.2/3.4 the src string is
+     *   overwritten with partial result, despite the icu docs sayeth:
+     *
+     *      The source string and the destination buffer
+     *      are allowed to overlap.
+     *
+     *  Workaround:  'preflighting' returns needed length
+     *  Alternative: forget about inplace operation - create new result
+     *
+     *  TODO downcase, titlecase
+     */
+    needed = u_strToUpper(NULL, 0,
+            src->strstart, src_len,
+            NULL,       /* locale = default */
+            &err);
+    if (needed > dest_len) {
+        Parrot_reallocate_string(interpreter, src, needed * sizeof(UChar));
+        dest_len = needed;
+    }
+    err = U_ZERO_ERROR;
     dest_len = u_strToUpper(src->strstart, dest_len,
             src->strstart, src_len,
             NULL,       /* locale = default */
             &err);
+    assert(U_SUCCESS(err));
     src->bufused = dest_len * sizeof(UChar);
-    if (!U_SUCCESS(err)) {
-        /*
-         * XXX troubles:
-         *   t/op/string_cs_44  upcase unicode:"\u01f0"
-         *   this creates \u004a \u030c J+NON-SPACING HACEK
-         *   the string needs resizing, *if* the src buffer is
-         *   too short. *But* (at least) with icu 3.2 the src string is
-         *   overwritten with \0 despite the icu docs sayeth:
-         *
-         *      The source string and the destination buffer
-         *      are allowed to overlap.
-         */
-        err = U_ZERO_ERROR;
-        Parrot_reallocate_string(interpreter, src, dest_len);
-        dest_len = u_strToUpper(src->strstart, dest_len,
-                src->strstart, src_len,
-                NULL,       /* locale = default */
-                &err);
-        assert(U_SUCCESS(err));
-        src->bufused = dest_len * sizeof(UChar);
-    }
     /* downgrade if possible */
     if (dest_len == (int)src->strlen)
         src->encoding = Parrot_ucs2_encoding_ptr;

Reply via email to