Author: pmichaud
Date: Fri Nov 4 09:32:27 2005
New Revision: 9778
Modified:
trunk/charset/gen_tables.pl
trunk/charset/tables.c
trunk/compilers/pge/PGE/Exp.pir
trunk/compilers/pge/PGE/P6Rule.pir
trunk/examples/pge/all.pir
trunk/t/p6rules/metachars.t
Log:
* Added correct interpretation for \n and \N metacharacters.
* Updated cclass tables to include \x0a and \x85 in the .CCLASS_NEWLINE
* Removed todo designations from passing tests
Modified: trunk/charset/gen_tables.pl
==============================================================================
--- trunk/charset/gen_tables.pl (original)
+++ trunk/charset/gen_tables.pl Fri Nov 4 09:32:27 2005
@@ -58,7 +58,7 @@ sub classify {
$ret |= 0x0200 if $chr =~ /^[[:cntrl:]]$/; # CCLASS_CONTROL
$ret |= 0x0400 if $chr =~ /^[[:punct:]]$/; # CCLASS_PUNCTUATION
$ret |= 0x0800 if $chr =~ /^[[:alnum:]]$/; # CCLASS_ALPHANUMERIC
- $ret |= 0x1000 if $chr =~ /^[\n\r]$/; # CCLASS_NEWLINE
+ $ret |= 0x1000 if $chr =~ /^[\n\r\f\x85]$/; # CCLASS_NEWLINE
$ret |= 0x2000 if $chr =~ /^[[:alnum:]_]$/; # CCLASS_WORD
return $ret;
Modified: trunk/charset/tables.c
==============================================================================
--- trunk/charset/tables.c (original)
+++ trunk/charset/tables.c Fri Nov 4 09:32:27 2005
@@ -16,7 +16,7 @@
#include "tables.h"
const PARROT_CCLASS_FLAGS Parrot_ascii_typetable[256] = {
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x0220, 0x1220, 0x0200, 0x0200, /* 8-15 */
+0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
@@ -31,7 +31,7 @@ const PARROT_CCLASS_FLAGS Parrot_ascii_t
0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0020, 0x0000, 0x0000, /* 128-135 */
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1020, 0x0000, 0x0000, /* 128-135 */
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 136-143 */
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 144-151 */
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 152-159 */
@@ -50,7 +50,7 @@ const PARROT_CCLASS_FLAGS Parrot_ascii_t
};
const PARROT_CCLASS_FLAGS Parrot_iso_8859_1_typetable[256] = {
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x0220, 0x1220, 0x0200, 0x0200, /* 8-15 */
+0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
@@ -65,7 +65,7 @@ const PARROT_CCLASS_FLAGS Parrot_iso_885
0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0220, 0x0200, 0x0200, /* 128-135 */
+0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x1220, 0x0200, 0x0200, /* 128-135 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 136-143 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 144-151 */
0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 152-159 */
Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir (original)
+++ trunk/compilers/pge/PGE/Exp.pir Fri Nov 4 09:32:27 2005
@@ -465,6 +465,13 @@ register.
.namespace [ "PGE::Exp::CCShortcut" ]
+.sub "reduce" :method
+ $S0 = self["value"]
+ if $S0 != "\\n" goto end
+ self["isquant"] = 1
+ end:
+.end
+
.sub "gen" :method
.param pmc code
.param string label
@@ -477,17 +484,16 @@ register.
(min, max, islazy, iscut, $S0) = self."quant"()
emit = find_global "PGE::Exp", "emit"
emit(code, "\n %s: # %s %s", label, token, $S0)
+ if token == "\\n" goto newline
if token == "." goto dot
find = " $I0 = find_not_cclass %s, target, pos, lastpos"
if token == "\\s" goto space
if token == "\\d" goto digit
if token == "\\w" goto word
- if token == "\\n" goto newline
find = " $I0 = find_cclass %s, target, pos, lastpos"
if token == "\\S" goto space
if token == "\\D" goto digit
if token == "\\W" goto word
- if token == "\\N" goto newline
dot:
emit(code, " $I0 = lastpos")
goto char
@@ -500,9 +506,6 @@ register.
word:
emit(code, find, .CCLASS_WORD)
goto char
- newline:
- emit(code, find, .CCLASS_NEWLINE)
- goto char
char:
emit(code, " rep = $I0 - pos")
emit(code, " if rep < %s goto fail", min)
@@ -539,6 +542,15 @@ register.
cut:
emit(code, " goto %s", next)
.return ()
+ newline: # single newline
+ emit(code, " $I0 = is_cclass %s, target, pos", .CCLASS_NEWLINE)
+ emit(code, " if $I0 == 0 goto fail")
+ emit(code, " $S0 = substr target, pos, 2")
+ emit(code, " inc pos")
+ emit(code, " if $S0 != \"\\r\\n\" goto %s", next)
+ emit(code, " inc pos")
+ emit(code, " goto %s", next)
+ .return ()
.end
Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir Fri Nov 4 09:32:27 2005
@@ -34,7 +34,6 @@
optable.addtok("term:\\w", "term:", "nows", "PGE::Exp::CCShortcut")
optable.addtok("term:\\W", "term:", "nows", "PGE::Exp::CCShortcut")
optable.addtok("term:\\n", "term:", "nows", "PGE::Exp::CCShortcut")
- optable.addtok("term:\\N", "term:", "nows", "PGE::Exp::CCShortcut")
optable.addtok("circumfix:[ ]", "term:", "nows", "PGE::Exp::Group")
optable.addtok("circumfix:( )", "term:", "nows", "PGE::Exp::Group")
@@ -80,6 +79,7 @@
$P0["t"] = "\t"
$P0["v"] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
$P0["h"] =
unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
+ $P0["n"] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029"
# See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
.end
Modified: trunk/examples/pge/all.pir
==============================================================================
--- trunk/examples/pge/all.pir (original)
+++ trunk/examples/pge/all.pir Fri Nov 4 09:32:27 2005
@@ -13,6 +13,7 @@
load_bytecode "PGE/Dumper.pir"
load_bytecode "PGE/Util.pir"
+ print "loaded p6rule\n"
"loadexpr"("Simple", "expr")
"loadexpr"("Perl6", "expr")
Modified: trunk/t/p6rules/metachars.t
==============================================================================
--- trunk/t/p6rules/metachars.t (original)
+++ trunk/t/p6rules/metachars.t Fri Nov 4 09:32:27 2005
@@ -47,8 +47,8 @@ p6rule_is ("abc\012def", 'c \n d', 'log
p6rule_is ("abc\015def", 'c \n d', 'logical newline (\n)');
p6rule_is ("abc\n\ndef", 'c \n+ d', 'logical newline (\n)');
p6rule_isnt('abcdef', 'a\n+f', 'logical newline (\n)');
-p6rule_is ("abc\012\015def", 'c \n d', 'logical newline (\n)', todo =>
'specification unclear');
-p6rule_is ("abc\015\012def", 'c \n d', 'logical newline (\n)', todo =>
'specification unclear');
+p6rule_isnt("abc\012\015def", 'c \n d', 'logical newline (\n)');
+p6rule_is ("abc\015\012def", 'c \n d', 'logical newline (\n)');
p6rule_isnt("abc\ndef", 'b \n c', 'logical newline (\n)');
p6rule_is ("a", '\N', 'not logical newline (\N)');
p6rule_is ("abc", 'a \N c', 'not logical newline (\N)');
@@ -58,8 +58,8 @@ p6rule_isnt("abc\012def", 'c \N d', 'not
p6rule_isnt("abc\015def", 'c \N d', 'not logical newline (\N)');
p6rule_isnt("abc\n\ndef", 'c \N+ d', 'not logical newline (\N)');
p6rule_is ('abcdef', 'a\N+f', 'not logical newline (\N)');
-p6rule_is ("abc\012\015def", 'c \N d', 'not logical newline (\N)', todo =>
'specification unclear');
-p6rule_is ("abc\015\012def", 'c \N d', 'not logical newline (\N)', todo =>
'specification unclear');
+p6rule_isnt("abc\012\015def", 'c \N d', 'not logical newline (\N)');
+p6rule_isnt("abc\015\012def", 'c \N d', 'not logical newline (\N)');
p6rule_is ("abc\ndef", 'b \N \n', 'not logical newline (\N)');