Author: pmichaud
Date: Fri Nov  4 09:32:27 2005
New Revision: 9778

Modified:
   trunk/charset/gen_tables.pl
   trunk/charset/tables.c
   trunk/compilers/pge/PGE/Exp.pir
   trunk/compilers/pge/PGE/P6Rule.pir
   trunk/examples/pge/all.pir
   trunk/t/p6rules/metachars.t
Log:
* Added correct interpretation for \n and \N metacharacters.
* Updated cclass tables to include \x0a and \x85 in the .CCLASS_NEWLINE
* Removed todo designations from passing tests


Modified: trunk/charset/gen_tables.pl
==============================================================================
--- trunk/charset/gen_tables.pl (original)
+++ trunk/charset/gen_tables.pl Fri Nov  4 09:32:27 2005
@@ -58,7 +58,7 @@ sub classify {
     $ret |= 0x0200 if $chr =~ /^[[:cntrl:]]$/;  # CCLASS_CONTROL        
     $ret |= 0x0400 if $chr =~ /^[[:punct:]]$/;  # CCLASS_PUNCTUATION    
     $ret |= 0x0800 if $chr =~ /^[[:alnum:]]$/;  # CCLASS_ALPHANUMERIC   
-    $ret |= 0x1000 if $chr =~ /^[\n\r]$/;       # CCLASS_NEWLINE
+    $ret |= 0x1000 if $chr =~ /^[\n\r\f\x85]$/; # CCLASS_NEWLINE
     $ret |= 0x2000 if $chr =~ /^[[:alnum:]_]$/; # CCLASS_WORD
 
     return $ret;

Modified: trunk/charset/tables.c
==============================================================================
--- trunk/charset/tables.c      (original)
+++ trunk/charset/tables.c      Fri Nov  4 09:32:27 2005
@@ -16,7 +16,7 @@
 #include "tables.h"
 const PARROT_CCLASS_FLAGS Parrot_ascii_typetable[256] = {
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x0220, 0x1220, 0x0200, 0x0200, /* 8-15 */
+0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
 0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
@@ -31,7 +31,7 @@ const PARROT_CCLASS_FLAGS Parrot_ascii_t
 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
 0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0020, 0x0000, 0x0000, /* 128-135 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1020, 0x0000, 0x0000, /* 128-135 */
 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 136-143 */
 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 144-151 */
 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 152-159 */
@@ -50,7 +50,7 @@ const PARROT_CCLASS_FLAGS Parrot_ascii_t
 };
 const PARROT_CCLASS_FLAGS Parrot_iso_8859_1_typetable[256] = {
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x0220, 0x1220, 0x0200, 0x0200, /* 8-15 */
+0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
 0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
@@ -65,7 +65,7 @@ const PARROT_CCLASS_FLAGS Parrot_iso_885
 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
 0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0220, 0x0200, 0x0200, /* 128-135 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x1220, 0x0200, 0x0200, /* 128-135 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 136-143 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 144-151 */
 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 152-159 */

Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir     (original)
+++ trunk/compilers/pge/PGE/Exp.pir     Fri Nov  4 09:32:27 2005
@@ -465,6 +465,13 @@ register.
 
 .namespace [ "PGE::Exp::CCShortcut" ]
 
+.sub "reduce" :method
+    $S0 = self["value"]
+    if $S0 != "\\n" goto end
+    self["isquant"] = 1
+  end:
+.end
+    
 .sub "gen" :method
     .param pmc code
     .param string label
@@ -477,17 +484,16 @@ register.
     (min, max, islazy, iscut, $S0) = self."quant"()
     emit = find_global "PGE::Exp", "emit"
     emit(code, "\n %s:  # %s %s", label, token, $S0)
+    if token == "\\n" goto newline
     if token == "." goto dot
     find = "    $I0 = find_not_cclass %s, target, pos, lastpos"
     if token == "\\s" goto space
     if token == "\\d" goto digit
     if token == "\\w" goto word
-    if token == "\\n" goto newline
     find = "    $I0 = find_cclass %s, target, pos, lastpos"
     if token == "\\S" goto space
     if token == "\\D" goto digit
     if token == "\\W" goto word
-    if token == "\\N" goto newline
   dot:
     emit(code, "    $I0 = lastpos")
     goto char
@@ -500,9 +506,6 @@ register.
   word:
     emit(code, find, .CCLASS_WORD)
     goto char
-  newline:
-    emit(code, find, .CCLASS_NEWLINE)
-    goto char
   char:
     emit(code, "    rep = $I0 - pos")
     emit(code, "    if rep < %s goto fail", min)
@@ -539,6 +542,15 @@ register.
   cut:
     emit(code, "    goto %s", next)
     .return ()
+  newline:                                         # single newline
+    emit(code, "    $I0 = is_cclass %s, target, pos", .CCLASS_NEWLINE)
+    emit(code, "    if $I0 == 0 goto fail")
+    emit(code, "    $S0 = substr target, pos, 2")
+    emit(code, "    inc pos")
+    emit(code, "    if $S0 != \"\\r\\n\" goto %s", next)
+    emit(code, "    inc pos")
+    emit(code, "    goto %s", next)
+    .return ()
 .end
 
 

Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir  (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir  Fri Nov  4 09:32:27 2005
@@ -34,7 +34,6 @@
     optable.addtok("term:\\w", "term:", "nows", "PGE::Exp::CCShortcut")
     optable.addtok("term:\\W", "term:", "nows", "PGE::Exp::CCShortcut")
     optable.addtok("term:\\n", "term:", "nows", "PGE::Exp::CCShortcut")
-    optable.addtok("term:\\N", "term:", "nows", "PGE::Exp::CCShortcut")
 
     optable.addtok("circumfix:[ ]", "term:", "nows", "PGE::Exp::Group")
     optable.addtok("circumfix:( )", "term:", "nows", "PGE::Exp::Group")
@@ -80,6 +79,7 @@
     $P0["t"] = "\t"
     $P0["v"] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
     $P0["h"] = 
unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
+    $P0["n"] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029"
     # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
 .end
 

Modified: trunk/examples/pge/all.pir
==============================================================================
--- trunk/examples/pge/all.pir  (original)
+++ trunk/examples/pge/all.pir  Fri Nov  4 09:32:27 2005
@@ -13,6 +13,7 @@
     load_bytecode "PGE/Dumper.pir"
     load_bytecode "PGE/Util.pir"
 
+    print "loaded p6rule\n"
     "loadexpr"("Simple", "expr")
     "loadexpr"("Perl6", "expr")
 

Modified: trunk/t/p6rules/metachars.t
==============================================================================
--- trunk/t/p6rules/metachars.t (original)
+++ trunk/t/p6rules/metachars.t Fri Nov  4 09:32:27 2005
@@ -47,8 +47,8 @@ p6rule_is  ("abc\012def", 'c \n d', 'log
 p6rule_is  ("abc\015def", 'c \n d', 'logical newline (\n)');
 p6rule_is  ("abc\n\ndef", 'c \n+ d', 'logical newline (\n)');
 p6rule_isnt('abcdef', 'a\n+f', 'logical newline (\n)');
-p6rule_is  ("abc\012\015def", 'c \n d', 'logical newline (\n)', todo => 
'specification unclear');
-p6rule_is  ("abc\015\012def", 'c \n d', 'logical newline (\n)', todo => 
'specification unclear');
+p6rule_isnt("abc\012\015def", 'c \n d', 'logical newline (\n)');
+p6rule_is  ("abc\015\012def", 'c \n d', 'logical newline (\n)');
 p6rule_isnt("abc\ndef", 'b \n c', 'logical newline (\n)');
 p6rule_is  ("a", '\N', 'not logical newline (\N)');
 p6rule_is  ("abc", 'a \N c', 'not logical newline (\N)');
@@ -58,8 +58,8 @@ p6rule_isnt("abc\012def", 'c \N d', 'not
 p6rule_isnt("abc\015def", 'c \N d', 'not logical newline (\N)');
 p6rule_isnt("abc\n\ndef", 'c \N+ d', 'not logical newline (\N)');
 p6rule_is  ('abcdef', 'a\N+f', 'not logical newline (\N)');
-p6rule_is  ("abc\012\015def", 'c \N d', 'not logical newline (\N)', todo => 
'specification unclear');
-p6rule_is  ("abc\015\012def", 'c \N d', 'not logical newline (\N)', todo => 
'specification unclear');
+p6rule_isnt("abc\012\015def", 'c \N d', 'not logical newline (\N)');
+p6rule_isnt("abc\015\012def", 'c \N d', 'not logical newline (\N)');
 p6rule_is  ("abc\ndef", 'b \N \n', 'not logical newline (\N)');
 
 

Reply via email to