Author: leo
Date: Thu Nov  3 11:43:59 2005
New Revision: 9757

Modified:
   trunk/charset/unicode.c
   trunk/t/op/string_cclass.t
Log:
use iso-8859-1 cclass table for <256; fix find_not_cclass; more verbose errs in 
test

Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Thu Nov  3 11:43:59 2005
@@ -264,22 +264,23 @@ is_cclass(Interp *interpreter, PARROT_CC
     if (offset >= source_string->strlen)
         return 0;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
+    if (codepoint >= 256) {
 #if PARROT_HAS_ICU
-    for (mask = enum_cclass_uppercase;
-            mask <= enum_cclass_word ; mask <<= 1) {
-        bit = mask & flags;
-        if (!bit)
-            continue;
-        if (is_foo(interpreter, codepoint, bit))
-            return 1;
-    }
-    return 0;
+        for (mask = enum_cclass_uppercase;
+                mask <= enum_cclass_word ; mask <<= 1) {
+            bit = mask & flags;
+            if (!bit)
+                continue;
+            if (is_foo(interpreter, codepoint, bit))
+                return 1;
+        }
+        return 0;
 #else
-    if (codepoint >= 256)
         real_exception(interpreter, NULL, E_LibraryNotLoadedError,
                 "no ICU lib loaded");
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
 #endif
+    }
+    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
 }
 
 static INTVAL
@@ -296,23 +297,26 @@ find_cclass(Interp *interpreter, PARROT_
     end = source_string->strlen < end ? source_string->strlen : end;
     for (; pos < end; ++pos) {
         codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+        if (codepoint >= 256) {
 #if PARROT_HAS_ICU
-        for (mask = enum_cclass_uppercase;
-                mask <= enum_cclass_word ; mask <<= 1) {
-            bit = mask & flags;
-            if (!bit)
-                continue;
-            if (is_foo(interpreter, codepoint, bit))
-                return pos;
-        }
+            for (mask = enum_cclass_uppercase;
+                    mask <= enum_cclass_word ; mask <<= 1) {
+                bit = mask & flags;
+                if (!bit)
+                    continue;
+                if (is_foo(interpreter, codepoint, bit))
+                    return pos;
+            }
 #else
-        if (codepoint >= 256)
             real_exception(interpreter, NULL, E_LibraryNotLoadedError,
                     "no ICU lib loaded");
-        if ((Parrot_iso_8859_1_typetable[codepoint] & flags) != 0) {
-            return pos;
-        }
 #endif
+        }
+        else {
+            if (Parrot_iso_8859_1_typetable[codepoint] & flags) {
+                return pos;
+            }
+        }
     }
     return end;
 }
@@ -330,24 +334,27 @@ find_not_cclass(Interp *interpreter, PAR
     assert(source_string != 0);
     end = source_string->strlen < end ? source_string->strlen : end;
     for (; pos < end; ++pos) {
-       codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+        codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+        if (codepoint >= 256) {
 #if PARROT_HAS_ICU
-        for (mask = enum_cclass_uppercase;
-                mask <= enum_cclass_word ; mask <<= 1) {
-            bit = mask & flags;
-            if (!bit)
-                continue;
-            if (!is_foo(interpreter, codepoint, bit))
-                return pos;
-        }
+            for (mask = enum_cclass_uppercase;
+                    mask <= enum_cclass_word ; mask <<= 1) {
+                bit = mask & flags;
+                if (!bit)
+                    continue;
+                if (!is_foo(interpreter, codepoint, bit))
+                    return pos;
+            }
 #else
-        if (codepoint >= 256)
             real_exception(interpreter, NULL, E_LibraryNotLoadedError,
                     "no ICU lib loaded");
-        if ((Parrot_iso_8859_1_typetable[codepoint] & flags) != 0) {
-            return pos;
-        }
 #endif
+        } 
+        else {
+            if (!(Parrot_iso_8859_1_typetable[codepoint] & flags)) {
+                return pos;
+            }
+        }
     }
     return end;
 }

Modified: trunk/t/op/string_cclass.t
==============================================================================
--- trunk/t/op/string_cclass.t  (original)
+++ trunk/t/op/string_cclass.t  Thu Nov  3 11:43:59 2005
@@ -324,6 +324,14 @@ pir_output_is(<<"CODE", <<'OUT', "unicod
 loop:
    result = is_cclass .CCLASS_WHITESPACE, s, i
    print result
+   if result goto ok
+   \$S0 = s[i]
+   \$I0 = ord \$S0
+   \$P0 = new .ResizablePMCArray
+   push \$P0, \$I0
+   \$S0 = sprintf "\\nchar %#x not reported as ws\\n", \$P0
+   print \$S0
+ok:
    inc i
    if i < len goto loop
    print "\\n"

Reply via email to