Author: leo
Date: Thu Nov  3 03:29:29 2005
New Revision: 9738

Modified:
   trunk/charset/unicode.c
   trunk/t/op/string_cclass.t
Log:
Implement unicode find_cclass, find_not_cclass

* simplify r9737
* tests


Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Thu Nov  3 03:29:29 2005
@@ -201,65 +201,71 @@ validate(Interp *interpreter, STRING *sr
     return 1;
 }
 
+static int
+is_foo(Interp *interpreter, UINTVAL codepoint, int bit)
+{
+    switch (bit) {
+        case enum_cclass_uppercase:
+            return u_isupper(codepoint);
+            break;
+        case enum_cclass_lowercase:
+            return u_islower(codepoint);
+            break;
+        case enum_cclass_alphabetic:
+            return u_isalpha(codepoint);
+            break;
+        case enum_cclass_numeric:
+            return u_isdigit(codepoint);
+            /* XXX which one
+               return u_charDigitValue(codepoint);
+               */
+            break;
+        case enum_cclass_hexadecimal:
+            return u_isxdigit(codepoint);
+            break;
+        case enum_cclass_whitespace:
+            return u_isspace(codepoint);
+            break;
+        case enum_cclass_printing:
+            return u_isprint(codepoint);
+            break;
+        case enum_cclass_graphical:
+            return u_isgraph(codepoint);
+            break;
+        case enum_cclass_blank:
+            return u_isblank(codepoint);
+            break;
+        case enum_cclass_control:
+            return u_iscntrl(codepoint);
+            break;
+        case enum_cclass_alphanumeric:
+            return u_isalnum(codepoint);
+            break;
+        default:
+            UNIMPL;
+    }
+    return 0;
+}
+
 static INTVAL
 is_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset)
 {
     UINTVAL codepoint;
-    int result, bit, mask;
+    int bit, mask;
 
     if (offset >= source_string->strlen)
         return 0;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
 #if PARROT_HAS_ICU
-    for (result = 0, mask = enum_cclass_uppercase;
+    for (mask = enum_cclass_uppercase;
             mask <= enum_cclass_word ; mask <<= 1) {
         bit = mask & flags;
-        switch (bit) {
-            case 0: continue;
-            case enum_cclass_uppercase:
-                    result |= u_isupper(codepoint);
-                    break;
-            case enum_cclass_lowercase:
-                    result |= u_islower(codepoint);
-                    break;
-            case enum_cclass_alphabetic:
-                    result |= u_isalpha(codepoint);
-                    break;
-            case enum_cclass_numeric:
-                    result |= u_isdigit(codepoint);
-                    /* XXX which one
-                       result |= u_charDigitValue(codepoint);
-                       */
-                    break;
-            case enum_cclass_hexadecimal:
-                    result |= u_isxdigit(codepoint);
-                    break;
-            case enum_cclass_whitespace:
-                    result |= u_isspace(codepoint);
-                    break;
-            case enum_cclass_printing:
-                    result |= u_isprint(codepoint);
-                    break;
-            case enum_cclass_graphical:
-                    result |= u_isgraph(codepoint);
-                    break;
-            case enum_cclass_blank:
-                    result |= u_isblank(codepoint);
-                    break;
-            case enum_cclass_control:
-                    result |= u_iscntrl(codepoint);
-                    break;
-            case enum_cclass_alphanumeric:
-                    result |= u_isalnum(codepoint);
-                    break;
-            default:
-                    UNIMPL;
-        }
-        /* more bits? */
-        if (~ (flags ^ ~mask) == 0)
-            break;
+        if (!bit)
+            continue;
+        if (is_foo(interpreter, codepoint, bit))
+            return 1;
     }
-    return result;
+    return 0;
 #else
     if (codepoint >= 128)
         real_exception(interpreter, NULL, E_LibraryNotLoadedError,
@@ -271,15 +277,67 @@ is_cclass(Interp *interpreter, PARROT_CC
 static INTVAL
 find_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
 {
-    UNIMPL;
-    return -1;
+    UINTVAL pos = offset;
+    UINTVAL end = offset + count;
+    UINTVAL codepoint;
+    int bit, mask;
+
+    assert(source_string != 0);
+    end = source_string->strlen < end ? source_string->strlen : end;
+    for (; pos < end; ++pos) {
+        codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+#if PARROT_HAS_ICU
+        for (mask = enum_cclass_uppercase;
+                mask <= enum_cclass_word ; mask <<= 1) {
+            bit = mask & flags;
+            if (!bit)
+                continue;
+            if (is_foo(interpreter, codepoint, bit))
+                return pos;
+        }
+#else
+        if (codepoint >= 128)
+            real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+                    "no ICU lib loaded");
+        if ((Parrot_ascii_typetable[codepoint] & flags) != 0) {
+            return pos;
+        }
+#endif
+    }
+    return end;
 }
 
 static INTVAL
 find_not_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
 {
-    UNIMPL;
-    return -1;
+    UINTVAL pos = offset;
+    UINTVAL end = offset + count;
+    UINTVAL codepoint;
+    int bit, mask;
+
+    assert(source_string != 0);
+    end = source_string->strlen < end ? source_string->strlen : end;
+    for (; pos < end; ++pos) {
+       codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+#if PARROT_HAS_ICU
+        for (mask = enum_cclass_uppercase;
+                mask <= enum_cclass_word ; mask <<= 1) {
+            bit = mask & flags;
+            if (!bit)
+                continue;
+            if (!is_foo(interpreter, codepoint, bit))
+                return pos;
+        }
+#else
+        if (codepoint >= 128)
+            real_exception(interpreter, NULL, E_LibraryNotLoadedError,
+                    "no ICU lib loaded");
+        if ((Parrot_ascii_typetable[codepoint] & flags) != 0) {
+            return pos;
+        }
+#endif
+    }
+    return end;
 }
 
 static STRING *

Modified: trunk/t/op/string_cclass.t
==============================================================================
--- trunk/t/op/string_cclass.t  (original)
+++ trunk/t/op/string_cclass.t  Thu Nov  3 03:29:29 2005
@@ -18,7 +18,7 @@ Tests find_cclass find_not_cclass, is_cc
 
 use strict;
 
-use Parrot::Test tests => 7;
+use Parrot::Test tests => 9;
 
 pir_output_is(<<'CODE', <<'OUT', "find_cclass, ascii");
 .include "cclass.pasm"
@@ -308,7 +308,7 @@ sub string {
 }
 
 my $all_ws = string('whitespace');
-pir_output_is(<<"CODE", <<'OUT', "unicode whitespace");
+pir_output_is(<<"CODE", <<'OUT', "unicode is_cclass whitespace");
 .sub main :main
 .include "cclass.pasm"
    .local int result, char, len, i
@@ -327,3 +327,36 @@ CODE
 11111111111111111111111111
 OUT
 
+pir_output_is(<<"CODE", <<'OUT', "unicode find_ccclass whitespace");
+.sub main :main
+.include "cclass.pasm"
+   .local int result, char, len, i
+   .local string s
+   s = $all_ws
+   s = unicode:"abc" . s
+   len = length s
+   result = find_cclass .CCLASS_WHITESPACE, s, 0, len
+   print result
+   print "\\n"
+.end
+CODE
+3
+OUT
+
+pir_output_is(<<"CODE", <<'OUT', "unicode find_not_ccclass whitespace");
+.sub main :main
+.include "cclass.pasm"
+   .local int result, char, len, i
+   .local string s
+   s = $all_ws
+   s .= unicode:"abc"
+   len = length s
+   result = find_not_cclass .CCLASS_WHITESPACE, s, 0, len
+   print len
+   print ' '
+   print result
+   print "\\n"
+.end
+CODE
+29 26
+OUT

Reply via email to