Author: jrieks
Date: Sun May  8 14:24:53 2005
New Revision: 8014

Added:
   trunk/charset/tables.c
   trunk/charset/tables.h
   trunk/include/parrot/cclass.h
   trunk/t/op/string_cclass.t
Modified:
   trunk/MANIFEST
   trunk/charset/ascii.c
   trunk/charset/ascii.h
   trunk/charset/binary.c
   trunk/charset/iso-8859-1.c
   trunk/charset/unicode.c
   trunk/config/gen/parrot_include.pl
   trunk/include/parrot/charset.h
   trunk/include/parrot/string_funcs.h
   trunk/ops/experimental.ops
   trunk/src/string.c
   trunk/t/op/string_cs.t
Log:
character classification, part 1:
- added include/parrot/cclass.h
- added PARROT_CCLASS_FLAGS enum
- added is_cclass (working), find_cclass (noop), find_not_cclass (noop) to 
experimental.ops
- removed old charset tables
- added new, automatically generated character classification tables
- is_punctuation now threads '_' as punctuation character (modified a test)
- find_wordchar also threads '_' as word character (modified a test)
- modified a test to also test access beyond string end


Modified: trunk/MANIFEST
==============================================================================
--- trunk/MANIFEST      (original)
+++ trunk/MANIFEST      Sun May  8 14:24:53 2005
@@ -44,8 +44,11 @@
 charset/ascii.h                                          []
 charset/binary.c                                 []
 charset/binary.h                                 []
+charset/gen_tables.pl                            [devel]
 charset/iso-8859-1.c                             []
 charset/iso-8859-1.h                             []
+charset/tables.c                                 []
+charset/tables.h                                 []
 charset/unicode.c                                []
 charset/unicode.h                                []
 classes/array.pmc                                 []
@@ -708,6 +711,7 @@
 include/parrot/autoprefix.h                      [devel]include
 include/parrot/builtin.h                         [devel]include
 include/parrot/caches.h                           [devel]include
+include/parrot/cclass.h                           [devel]include
 include/parrot/charset.h                         [devel]include
 include/parrot/datatypes.h                        [devel]include
 include/parrot/debug.h                            [devel]include
@@ -1716,6 +1720,7 @@
 t/op/spawnw.t                                     []
 t/op/stacks.t                                     []
 t/op/string.t                                     []
+t/op/string_cclass.t                             []
 t/op/string_cs.t                                  []
 t/op/stringu.t                                    []
 t/op/time.t                                       []

Modified: trunk/charset/ascii.c
==============================================================================
--- trunk/charset/ascii.c       (original)
+++ trunk/charset/ascii.c       Sun May  8 14:24:53 2005
@@ -30,33 +30,11 @@
 #define EXCEPTION(err, str) \
     real_exception(interpreter, NULL, err, str)
 
-#define WHITESPACE 1
-#define WORDCHAR 2
-#define PUNCTUATION 4
-#define DIGIT 8
-
-static const unsigned char typetable[256] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* 0-15 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
-    1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 32-47 */
-    0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 4, 4, 4, 4, 4, 4, /*48.*/
-    4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, /* 80-95 */
-    4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 95-111 */
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 0, /* 112-127 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128-143 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144-159 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160-175 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176-191 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192-207 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 208-223 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 224-239 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 240-255 */
-};
+#include "tables.h"
 
 INTVAL
 ascii_find_thing(Interp *interpreter, STRING *string, UINTVAL start,
-        unsigned char type, const unsigned char *table)
+        PARROT_CCLASS_FLAGS type, const PARROT_CCLASS_FLAGS *table)
 {
     for (; start < string->strlen; start++) {
         if (table[ENCODING_GET_BYTE(interpreter, string, start)] & type) {
@@ -68,7 +46,7 @@
 
 INTVAL
 ascii_find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
-        unsigned char type, const unsigned char *table)
+        PARROT_CCLASS_FLAGS type, const PARROT_CCLASS_FLAGS *table)
 {
     for (; start < string->strlen; start++) {
         if (!(table[ENCODING_GET_BYTE(interpreter, string, start)] & type)) {
@@ -413,21 +391,21 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] & WORDCHAR) ? 1 : 0;
+    return (Parrot_ascii_typetable[codepoint] & WORDCHAR) ? 1 : 0;
 }
 
 static INTVAL
 find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, WORDCHAR,
-            typetable);
+            Parrot_ascii_typetable);
 }
 
 static INTVAL
 find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset, WORDCHAR,
-            typetable);
+            Parrot_ascii_typetable);
 }
 
 static INTVAL
@@ -435,14 +413,14 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] == WHITESPACE);
+    return (Parrot_ascii_typetable[codepoint] & WHITESPACE) == WHITESPACE;
 }
 
 static INTVAL
 find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, WHITESPACE,
-            typetable);
+            Parrot_ascii_typetable);
 }
 
 static INTVAL
@@ -450,7 +428,7 @@
         UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset,
-            WHITESPACE, typetable);
+            WHITESPACE, Parrot_ascii_typetable);
 }
 
 static INTVAL
@@ -458,21 +436,21 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] & DIGIT) ? 1 : 0;
+    return (Parrot_ascii_typetable[codepoint] & DIGIT) == DIGIT;
 }
 
 static INTVAL
 find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, DIGIT,
-            typetable);
+            Parrot_ascii_typetable);
 }
 
 static INTVAL
 find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset, DIGIT,
-            typetable);
+            Parrot_ascii_typetable);
 }
 
 static INTVAL
@@ -480,14 +458,14 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] == PUNCTUATION);
+    return (Parrot_ascii_typetable[codepoint] & PUNCTUATION) == PUNCTUATION;
 }
 
 static INTVAL
 find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, PUNCTUATION,
-            typetable);
+            Parrot_ascii_typetable);
 }
 
 static INTVAL
@@ -495,7 +473,7 @@
         UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset,
-            PUNCTUATION, typetable);
+            PUNCTUATION, Parrot_ascii_typetable);
 }
 
 INTVAL
@@ -531,7 +509,7 @@
 
 INTVAL
 ascii_find_word_boundary(Interp *interpreter, STRING *string,
-        UINTVAL offset, const unsigned char *table)
+        UINTVAL offset, const PARROT_CCLASS_FLAGS *table)
 {
     UINTVAL c, len;
     int is_wc1, is_wc2;
@@ -561,7 +539,7 @@
 find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
   return ascii_find_word_boundary(interpreter, source_string,
-          offset, typetable);
+          offset, Parrot_ascii_typetable);
 }
 
 static STRING *
@@ -573,6 +551,32 @@
     return return_string;
 }
 
+static INTVAL
+is_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset)
+{
+    UINTVAL codepoint;
+    codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
+
+    if (codepoint >= sizeof(Parrot_ascii_typetable) / 
sizeof(Parrot_ascii_typetable[0])) {
+        return 0;
+    }
+    return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
+}
+
+static INTVAL
+find_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    real_exception(interpreter, NULL, UNIMPLEMENTED, "unimplemented 
ascii:find_cclass");
+    return -1;
+}
+
+static INTVAL
+find_not_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    real_exception(interpreter, NULL, UNIMPLEMENTED, "unimplemented 
ascii:find_not_cclass");
+    return -1;
+}
+
 /*
  * TODO pass in the Hash's seed value as initial hashval
  */
@@ -617,6 +621,9 @@
         ascii_cs_index,
         ascii_cs_rindex,
         validate,
+        is_cclass,
+        find_cclass,
+        find_not_cclass,
         is_wordchar,
         find_wordchar,
         find_not_wordchar,

Modified: trunk/charset/ascii.h
==============================================================================
--- trunk/charset/ascii.h       (original)
+++ trunk/charset/ascii.h       Sun May  8 14:24:53 2005
@@ -19,10 +19,10 @@
 
 INTVAL
 ascii_find_thing(Interp *interpreter, STRING *string, UINTVAL start,
-        unsigned char type, const unsigned char *table);
+        PARROT_CCLASS_FLAGS type, const PARROT_CCLASS_FLAGS *table);
 INTVAL
 ascii_find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
-        unsigned char type, const unsigned char *table);
+        PARROT_CCLASS_FLAGS type, const PARROT_CCLASS_FLAGS *table);
 STRING *ascii_get_graphemes(Interp *, STRING *source_string,
         UINTVAL offset, UINTVAL count);
 STRING *ascii_get_graphemes_inplace(Interp *, STRING *source_string,
@@ -31,7 +31,7 @@
 INTVAL ascii_find_newline(Interp *, STRING *source_string, UINTVAL offset);
 INTVAL ascii_find_not_newline(Interp *, STRING *source_string, UINTVAL offset);
 INTVAL ascii_find_word_boundary(Interp *, STRING *source_string,
-        UINTVAL offset, const unsigned char *typetable);
+        UINTVAL offset, const PARROT_CCLASS_FLAGS *typetable);
 INTVAL ascii_compare(Interp *, STRING *lhs, STRING *rhs);
 INTVAL ascii_compare(Interp *, STRING *lhs, STRING *rhs);
 INTVAL ascii_cs_index(Interp *, STRING *source_string,

Modified: trunk/charset/binary.c
==============================================================================
--- trunk/charset/binary.c      (original)
+++ trunk/charset/binary.c      Sun May  8 14:24:53 2005
@@ -233,6 +233,24 @@
     return -1;
 }
 
+static INTVAL
+is_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset)
+{
+    return 0;
+}
+
+static INTVAL
+find_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    return -1;
+}
+
+static INTVAL
+find_not_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    return -1;
+}
+
 static STRING *
 string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
 {
@@ -268,6 +286,9 @@
         cs_index,
         cs_rindex,
         validate,
+        is_cclass,
+        find_cclass,
+        find_not_cclass,
         is_wordchar,
         find_wordchar,
         find_not_wordchar,

Modified: trunk/charset/iso-8859-1.c
==============================================================================
--- trunk/charset/iso-8859-1.c  (original)
+++ trunk/charset/iso-8859-1.c  Sun May  8 14:24:53 2005
@@ -29,31 +29,7 @@
 #define EXCEPTION(err, str) \
     real_exception(interpreter, NULL, err, str)
 
-#define WHITESPACE 1
-#define WORDCHAR 2
-#define PUNCTUATION 4
-#define DIGIT 8
-
-static const unsigned char typetable[256] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* 0-15 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
-    1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 32-47 */
-    0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 4, 4, 4, 4, 4, 4, /* 48 
*/
-    4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, /* 80-95 */
-    4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 95-111 */
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 0, /* 112-127 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128-143 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144-159 */
-    1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 160-175 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, /* 176-191 */
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 192-207 */
-    2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, /* 208-223 */
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 224-239 */
-    2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, /* 240-255 */
-};
-
-
+#include "tables.h"
 
 static void
 set_graphemes(Interp *interpreter, STRING *source_string,
@@ -244,21 +220,21 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] & WORDCHAR) ? 1 : 0;
+    return (Parrot_iso_8859_1_typetable[codepoint] & WORDCHAR) == WORDCHAR;
 }
 
 static INTVAL
 find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, WORDCHAR,
-            typetable);
+            Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
 find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset, WORDCHAR,
-            typetable);
+            Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
@@ -266,14 +242,14 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] == WHITESPACE);
+    return (Parrot_iso_8859_1_typetable[codepoint] & WHITESPACE) == WHITESPACE;
 }
 
 static INTVAL
 find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, WHITESPACE,
-            typetable);
+            Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
@@ -281,7 +257,7 @@
         UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset,
-            WHITESPACE, typetable);
+            WHITESPACE, Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
@@ -289,21 +265,21 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] & DIGIT) ? 1 : 0;
+    return (Parrot_iso_8859_1_typetable[codepoint] & DIGIT) == DIGIT;
 }
 
 static INTVAL
 find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, DIGIT,
-            typetable);
+            Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
 find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset, DIGIT,
-            typetable);
+            Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
@@ -311,14 +287,14 @@
 {
     UINTVAL codepoint;
     codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
-    return (typetable[codepoint] == PUNCTUATION);
+    return (Parrot_iso_8859_1_typetable[codepoint] & PUNCTUATION) == 
PUNCTUATION;
 }
 
 static INTVAL
 find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
     return ascii_find_thing(interpreter, source_string, offset, PUNCTUATION,
-            typetable);
+            Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
@@ -326,7 +302,7 @@
         UINTVAL offset)
 {
     return ascii_find_not_thing(interpreter, source_string, offset,
-            PUNCTUATION, typetable);
+            PUNCTUATION, Parrot_iso_8859_1_typetable);
 }
 
 static INTVAL
@@ -339,9 +315,34 @@
 find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset)
 {
   return ascii_find_word_boundary(interpreter, source_string,
-          offset, typetable);
+          offset, Parrot_iso_8859_1_typetable);
+}
+
+static INTVAL
+is_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset)
+{
+    UINTVAL codepoint;
+    codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
+
+    if (codepoint >= sizeof(Parrot_ascii_typetable) / 
sizeof(Parrot_ascii_typetable[0])) {
+        return 0;
+    }
+    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
 }
 
+static INTVAL
+find_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    return -1;
+}
+
+static INTVAL
+find_not_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    return -1;
+}
+
+
 static STRING *
 string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
 {
@@ -377,6 +378,9 @@
         ascii_cs_index,
         ascii_cs_rindex,
         validate,
+       is_cclass,
+       find_cclass,
+       find_not_cclass,
         is_wordchar,
         find_wordchar,
         find_not_wordchar,

Added: trunk/charset/tables.c
==============================================================================
--- (empty file)
+++ trunk/charset/tables.c      Sun May  8 14:24:53 2005
@@ -0,0 +1,84 @@
+/* $id $
+ *  Copyright: 2005 The Perl Foundation.  All Rights Reserved.
+ *
+ * DO NOT EDIT THIS FILE DIRECTLY!
+ * please update the charset/gen_tables.pl script instead.
+ *
+ * Created by gen_tables.pl jrieks 
+ *  Overview:
+ *     This file contains various charset tables.
+ *  Data Structure and Algorithms:
+ *  History:
+ *  Notes:
+ *  References:
+ */
+
+#include "tables.h"
+const PARROT_CCLASS_FLAGS Parrot_ascii_typetable[256] = {
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
+0x0200, 0x0320, 0x1220, 0x0220, 0x0220, 0x1220, 0x0200, 0x0200, /* 8-15 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
+0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
+0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 40-47 */
+0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, /* 48-55 */
+0x28d8, 0x28d8, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 56-63 */
+0x04c0, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28c5, /* 64-71 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 72-79 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 80-87 */
+0x28c5, 0x28c5, 0x28c5, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x24c0, /* 88-95 */
+0x04c0, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28c6, /* 96-103 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
+0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 128-135 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 136-143 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 144-151 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 152-159 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 160-167 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 168-175 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 176-183 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 184-191 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 192-199 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 200-207 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 208-215 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 216-223 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 224-231 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 232-239 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 240-247 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 248-255 */
+};
+const PARROT_CCLASS_FLAGS Parrot_iso_8859_1_typetable[256] = {
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
+0x0200, 0x0320, 0x1220, 0x0220, 0x0220, 0x1220, 0x0200, 0x0200, /* 8-15 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
+0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
+0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 40-47 */
+0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, /* 48-55 */
+0x28d8, 0x28d8, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 56-63 */
+0x04c0, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28c5, /* 64-71 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 72-79 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 80-87 */
+0x28c5, 0x28c5, 0x28c5, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x24c0, /* 88-95 */
+0x04c0, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28c6, /* 96-103 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
+0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 128-135 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 136-143 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 144-151 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 152-159 */
+0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 160-167 */
+0x04c0, 0x04c0, 0x28c4, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 168-175 */
+0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x28c6, 0x04c0, 0x04c0, /* 176-183 */
+0x04c0, 0x04c0, 0x28c4, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 184-191 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 192-199 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 200-207 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x04c0, /* 208-215 */
+0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c6, /* 216-223 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 224-231 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 232-239 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x04c0, /* 240-247 */
+0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 248-255 */
+};

Added: trunk/charset/tables.h
==============================================================================
--- (empty file)
+++ trunk/charset/tables.h      Sun May  8 14:24:53 2005
@@ -0,0 +1,25 @@
+/* $id $
+ *  Copyright: 2005 The Perl Foundation.  All Rights Reserved.
+ *
+ * DO NOT EDIT THIS FILE DIRECTLY!
+ * please update the charset/gen_tables.pl script instead.
+ *
+ * Created by gen_tables.pl jrieks 
+ *  Overview:
+ *     This file contains various charset tables.
+ *  Data Structure and Algorithms:
+ *  History:
+ *  Notes:
+ *  References:
+ */
+
+#if !defined(PARROT_CHARSET_TABLES_H_GUARD)
+#define PARROT_CHARSET_TABLES_H_GUARD
+#include "parrot/cclass.h"
+#define WHITESPACE  enum_cclass_whitespace
+#define WORDCHAR    enum_cclass_word
+#define PUNCTUATION enum_cclass_punctuation
+#define DIGIT       enum_cclass_numeric
+extern const PARROT_CCLASS_FLAGS Parrot_ascii_typetable[256];
+extern const PARROT_CCLASS_FLAGS Parrot_iso_8859_1_typetable[256];
+#endif /* PARROT_CHARSET_TABLES_H_GUARD */

Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c     (original)
+++ trunk/charset/unicode.c     Sun May  8 14:24:53 2005
@@ -277,6 +277,27 @@
     return 0;
 }
 
+static INTVAL
+is_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset)
+{
+    UNIMPL;
+    return 0;
+}
+
+static INTVAL
+find_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    UNIMPL;
+    return -1;
+}
+
+static INTVAL
+find_not_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*source_string, UINTVAL offset, UINTVAL count)
+{
+    UNIMPL;
+    return -1;
+}
+
 static STRING *
 string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
 {
@@ -333,6 +354,9 @@
         mixed_cs_index,
         cs_rindex,
         validate,
+        is_cclass,
+        find_cclass,
+        find_not_cclass,
         is_wordchar,
         find_wordchar,
         find_not_wordchar,

Modified: trunk/config/gen/parrot_include.pl
==============================================================================
--- trunk/config/gen/parrot_include.pl  (original)
+++ trunk/config/gen/parrot_include.pl  Sun May  8 14:24:53 2005
@@ -20,6 +20,7 @@
 $description="Generating runtime/parrot/include...";
 
 my @files = qw(
+    include/parrot/cclass.h
     include/parrot/core_pmcs.h
     include/parrot/datatypes.h
     include/parrot/enums.h

Added: trunk/include/parrot/cclass.h
==============================================================================
--- (empty file)
+++ trunk/include/parrot/cclass.h       Sun May  8 14:24:53 2005
@@ -0,0 +1,32 @@
+/* dynext.h
+*
+* $Id: cclass.h jrieks $
+*
+*   Parrot character classes
+*/
+
+#if !defined(PARROT_CCLASS_H_GUARD)
+#define PARROT_CCLASS_H_GUARD
+
+/* &gen_from_enum(cclass.pasm) subst(s/enum_cclass_(\w+)/uc("CCLASS_$1")/e) */
+typedef enum {                         /* ASCII characters matching this 
class: */
+enum_cclass_any = 0x0000,              /* all */
+enum_cclass_none = 0xffff,             /* none */
+enum_cclass_uppercase = 0x0001,                /* A-Z */
+enum_cclass_lowercase = 0x0002,                /* a-z */
+enum_cclass_alphabetic = 0x0004,       /* a-z, A-Z */
+enum_cclass_numeric = 0x0008,          /* 0-9 */
+enum_cclass_hexadecimal = 0x0010,      /* 0-9, a-f, A-F */
+enum_cclass_whitespace = 0x0020,       /* ' ', '\f', '\n', '\r', '\t', '\v' */
+enum_cclass_printing = 0x0040,         /* any printable character including 
space */
+enum_cclass_graphical = 0x0080,                /* any printable character 
except space */
+enum_cclass_blank = 0x0100,            /* ' ', '\t' */
+enum_cclass_control = 0x0200,          /* control characters */
+enum_cclass_punctuation = 0x0400,      /* all except ' ', a-z, A-Z, 0-9 */
+enum_cclass_alphanumeric = 0x0800,      /* a-z, A-Z, 0-9 */
+enum_cclass_newline = 0x1000,           /* '\n', '\r' */
+enum_cclass_word = 0x2000,             /* a-z, A-Z, 0-9, '_'*/
+} PARROT_CCLASS_FLAGS;
+/* &end_gen */
+
+#endif /* PARROT_CCLASS_H_GUARD */

Modified: trunk/include/parrot/charset.h
==============================================================================
--- trunk/include/parrot/charset.h      (original)
+++ trunk/include/parrot/charset.h      Sun May  8 14:24:53 2005
@@ -15,6 +15,7 @@
 
 
 #include "parrot/encoding.h"
+#include "parrot/cclass.h"
 
 struct _charset;
 typedef struct _charset CHARSET;
@@ -55,6 +56,9 @@
 typedef INTVAL (*charset_index_t)(Interp *, STRING *source_string, STRING 
*search_string, UINTVAL offset);
 typedef INTVAL (*charset_rindex_t)(Interp *, STRING *source_string, STRING 
*search_string, UINTVAL offset);
 typedef UINTVAL (*charset_validate_t)(Interp *, STRING *source_string);
+typedef INTVAL (*charset_is_cclass_t)(Interp *, PARROT_CCLASS_FLAGS, STRING 
*source_string, UINTVAL offset);
+typedef INTVAL (*charset_find_cclass_t)(Interp *, PARROT_CCLASS_FLAGS, STRING 
*source_string, UINTVAL offset, UINTVAL count);
+typedef INTVAL (*charset_find_not_cclass_t)(Interp *, PARROT_CCLASS_FLAGS, 
STRING *source_string, UINTVAL offset, UINTVAL count);
 typedef INTVAL (*charset_is_wordchar_t)(Interp *, STRING *source_string, 
UINTVAL offset);
 typedef INTVAL (*charset_find_wordchar_t)(Interp *, STRING *source_string, 
UINTVAL offset);
 typedef INTVAL (*charset_find_not_wordchar_t)(Interp *, STRING *source_string, 
UINTVAL offset);
@@ -115,6 +119,9 @@
     charset_index_t index;
     charset_rindex_t rindex;
     charset_validate_t validate;
+    charset_is_cclass_t is_cclass;
+    charset_find_cclass_t find_cclass;
+    charset_find_not_cclass_t find_not_cclass;
     charset_is_wordchar_t is_wordchar;
     charset_find_wordchar_t find_wordchar;
     charset_find_not_wordchar_t find_not_wordchar;
@@ -153,6 +160,9 @@
 #define CHARSET_INDEX(interp, source, search, offset) ((CHARSET 
*)source->charset)->index(interpreter, source, search, offset)
 #define CHARSET_RINDEX(interp, source, search, offset) ((CHARSET 
*)source->charset)->rindex(interpreter, source, search, offset)
 #define CHARSET_VALIDATE(interp, source, offset) ((CHARSET 
*)source->charset)->validate(interpreter, source)
+#define CHARSET_IS_CCLASS(interp, flags, source, offset) ((CHARSET 
*)source->charset)->is_cclass(interpreter, flags, source, offset)
+#define CHARSET_FIND_CCLASS(interp, flags, source, offset, count) ((CHARSET 
*)source->charset)->find_cclass(interpreter, flags, source, offset, count)
+#define CHARSET_FIND_NOT_CCLASS(interp, flags, source, offset, count) 
((CHARSET *)source->charset)->find_not_cclass(interpreter, flags, source, 
offset, count)
 #define CHARSET_IS_WORDCHAR(interp, source, offset) ((CHARSET 
*)source->charset)->is_wordchar(interpreter, source, offset)
 #define CHARSET_FIND_WORDCHAR(interp, source, offset) ((CHARSET 
*)source->charset)->find_wordchar(interpreter, source, offset)
 #define CHARSET_FIND_NOT_WORDCHAR(interp, source, offset) ((CHARSET 
*)source->charset)->find_not_wordchar(interpreter, source, offset)

Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h (original)
+++ trunk/include/parrot/string_funcs.h Sun May  8 14:24:53 2005
@@ -102,6 +102,9 @@
 void string_downcase_inplace(Interp *, STRING *);
 void string_titlecase_inplace(Interp *, STRING *);
 
+INTVAL Parrot_string_is_cclass(Interp *, PARROT_CCLASS_FLAGS, STRING *, 
UINTVAL offset);
+INTVAL Parrot_string_find_cclass(Interp *, PARROT_CCLASS_FLAGS, STRING *, 
UINTVAL offset, UINTVAL count);
+INTVAL Parrot_string_find_not_cclass(Interp *, PARROT_CCLASS_FLAGS, STRING *, 
UINTVAL offset, UINTVAL count);
 INTVAL Parrot_string_is_whitespace(Interp *, STRING *, INTVAL offset);
 INTVAL Parrot_string_is_digit(Interp *, STRING *, INTVAL offset);
 INTVAL Parrot_string_is_wordchar(Interp *, STRING *, INTVAL offset);

Modified: trunk/ops/experimental.ops
==============================================================================
--- trunk/ops/experimental.ops  (original)
+++ trunk/ops/experimental.ops  Sun May  8 14:24:53 2005
@@ -246,6 +246,45 @@
   goto NEXT();
 }
 
+=item B<is_cclass>(out INT, in INT, in STR, in INT)
+
+Set $1 to 1 if the codepoint of $3 at position $4 is in
+the character class(es) given by $2.
+
+=cut
+
+inline op is_cclass(out INT, in INT, in STR, in INT) {
+  $1 = Parrot_string_is_cclass(interpreter, $2, $3, $4);
+  goto NEXT();
+}
+
+=item B<find_cclass>(out INT, in INT, in STR, in INT, in INT)
+
+Set $1 to the offset of the first codepoint matching
+the character class(es) given by $2 in string $3, starting
+at offset $4 for up to $5 codepoints.  If no matching
+character is found, set $1 to -1.
+
+=cut
+
+inline op find_cclass(out INT, in INT, in STR, in INT, in INT) {
+  $1 = Parrot_string_find_cclass(interpreter, $2, $3, $4, $5);
+  goto NEXT();
+}
+
+=item B<find_not_cclass>(out INT, in INT, in STR, in INT, in INT)
+
+Set $1 to the offset of the first codepoint not matching
+the character class(es) given by $2 in string $3, starting
+at offset $4 for up to $5 codepoints.  If the substring
+consists entirely of matching characters, set $1 to -1.
+
+=cut
+
+inline op find_not_cclass(out INT, in INT, in STR, in INT, in INT) {
+  $1 = Parrot_string_find_not_cclass(interpreter, $2, $3, $4, $5);
+  goto NEXT();
+}
 
 =back
 

Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Sun May  8 14:24:53 2005
@@ -2645,6 +2645,30 @@
     return CHARSET_FIND_WORD_BOUNDARY(interpreter, s, offset);
 }
 
+INTVAL
+Parrot_string_is_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, STRING 
*s, UINTVAL offset)
+{
+    if (!s)
+        return -1;
+    return CHARSET_IS_CCLASS(interpreter, flags, s, offset);
+}
+
+INTVAL
+Parrot_string_find_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, 
STRING *s, UINTVAL offset, UINTVAL count)
+{
+    if (!s)
+        return -1;
+    return CHARSET_FIND_CCLASS(interpreter, flags, s, offset, count);
+}
+
+INTVAL
+Parrot_string_find_not_cclass(Interp *interpreter, PARROT_CCLASS_FLAGS flags, 
STRING *s, UINTVAL offset, UINTVAL count)
+{
+    if (!s)
+        return -1;
+    return CHARSET_FIND_NOT_CCLASS(interpreter, flags, s, offset, count);
+}
+
 STRING*
 Parrot_string_trans_charset(Interp *interpreter, STRING *src,
         INTVAL charset_nr, STRING *dest)

Added: trunk/t/op/string_cclass.t
==============================================================================
--- (empty file)
+++ trunk/t/op/string_cclass.t  Sun May  8 14:24:53 2005
@@ -0,0 +1,100 @@
+#! perl -w
+# Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
+# $Id: cclass.t jrieks $
+
+=head1 NAME
+
+t/op/cclass.t - character class tests
+
+=head1 SYNOPSIS
+
+       % perl -Ilib t/op/cclass.t
+
+=head1 DESCRIPTION
+
+Tests find_cclass find_not_cclass, is_cclass.
+
+=cut
+
+use strict;
+
+use Parrot::Test tests => 1;
+
+pir_output_is(<<'CODE', <<'OUT', "is_cclass");
+.include "cclass.pasm"
+.sub main @MAIN
+    $S1 = ascii:"ab\nCX34.\0 \t!"
+    test1( $S1 )
+    $S1 = iso-8859-1:"ab\nCX34.\0 \t!"
+    test1( $S1 )
+.end
+.sub test1
+    .param string str
+    test2( str, .CCLASS_UPPERCASE)
+    test2( str, .CCLASS_LOWERCASE)
+    test2( str, .CCLASS_ALPHABETIC)
+    test2( str, .CCLASS_NUMERIC)
+    test2( str, .CCLASS_HEXADECIMAL)
+    test2( str, .CCLASS_WHITESPACE)
+    test2( str, .CCLASS_PRINTING)
+    test2( str, .CCLASS_GRAPHICAL)
+    test2( str, .CCLASS_BLANK)
+    test2( str, .CCLASS_CONTROL)
+    test2( str, .CCLASS_PUNCTUATION)
+    test2( str, .CCLASS_ALPHANUMERIC)
+    test2( str, .CCLASS_NEWLINE)
+    
+    $I0 = .CCLASS_NEWLINE|.CCLASS_WHITESPACE
+    test2( str, $I0)
+    $I0 = .CCLASS_WHITESPACE|.CCLASS_LOWERCASE
+    test2( str, $I0)
+    $I0 = .CCLASS_UPPERCASE|.CCLASS_PUNCTUATION
+    test2( str, $I0)
+.end
+.sub test2
+    .param string str
+    .param int code
+
+    $I1 = length str
+    set $I0, 0
+loop:
+    $I2 = is_cclass code, str, $I0
+    print $I2
+    inc $I0
+    if $I0 < $I1 goto loop
+    print "\n"
+.end
+CODE
+000110000000
+110000000000
+110110000000
+000001100000
+110101100000
+001000000110
+110111110101
+110111110001
+000000000110
+001000001010
+000000010001
+110111100000
+001000000000
+001000000110
+111000000110
+000110010001
+000110000000
+110000000000
+110110000000
+000001100000
+110101100000
+001000000110
+110111110101
+110111110001
+000000000110
+001000001010
+000000010001
+110111100000
+001000000000
+001000000110
+111000000110
+000110010001
+OUT

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Sun May  8 14:24:53 2005
@@ -93,7 +93,7 @@
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "is_whitespace");
-    set S0, iso-8859-1:"a\t\n \xa0"
+    set S0, iso-8859-1:"a\t\n \xa0" # is 0xa0 a whitespace in iso-8859-1??
     is_whitespace I0, S0, 0
     is_whitespace I1, S0, 1
     is_whitespace I2, S0, 2
@@ -111,15 +111,17 @@
     is_whitespace I1, S0, 1
     is_whitespace I2, S0, 2
     is_whitespace I3, S0, 3
+    is_whitespace I4, S0, 4 # access past string boundary: not a whitespace
     print I0
     print I1
     print I2
     print I3
+    print I4
     print "\n"
     end
 CODE
-01111
-0111
+01110
+01110
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "is_wordchar");
@@ -164,7 +166,7 @@
     print "\n"
     end
 CODE
-000001110
+000001111
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "is_newline");
@@ -226,7 +228,7 @@
     print "ok\n"
     end
 CODE
-2 5 -1 ok
+0 2 5 -1 ok
 OUTPUT
 
 output_is( <<'CODE', <<OUTPUT, "find_word_boundary");

Reply via email to