Author: pmichaud
Date: Thu Nov 3 06:47:05 2005
New Revision: 9741
Modified:
trunk/compilers/pge/PGE/Exp.pir
trunk/compilers/pge/PGE/P6Rule.pir
Log:
* Modified literal and charlist PIR code generation
* Support for \e, \f, \r, \t, \v, \h, and \E\F\R... counterparts
* Preliminary unicode support
Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir (original)
+++ trunk/compilers/pge/PGE/Exp.pir Thu Nov 3 06:47:05 2005
@@ -170,72 +170,76 @@ register.
=cut
-.sub "genliteral" :method
+.sub "genfixedstr" :method
.param pmc code
.param string label
.param string next
+ .param string testcode
+ .param string fixstr
+ .param string fixlen
.local pmc emit
.local int min, max, islazy, iscut, ignorecase
+ .local string testlabel
(min, max, islazy, iscut, $S0) = self."quant"()
ignorecase = self["ignorecase"]
emit = find_global "PGE::Exp", "emit"
- emit(code, " litlen = length lit")
- unless min == 1 goto quant
- unless max == 1 goto quant
- emit(code, " $S0 = substr target, pos, litlen")
- if ignorecase == 0 goto init_1
- emit(code, " downcase $S0")
- init_1:
- emit(code, " if $S0 != lit goto fail")
- emit(code, " pos += litlen")
+ testlabel = "fail"
+ if min != max goto quant
+ iscut = 1
+ if max != 1 goto quant
+ bsr test
emit(code, " goto %s", next)
- .return()
+ .return ()
quant:
emit(code, " rep = 0")
if islazy goto lazy
greedy:
- emit(code, " %s_lit1:", label)
+ testlabel = concat label, "_g2"
+ emit(code, " %s_g1:", label)
if max == PGE_INF goto greedy_1
- emit(code, " if rep >= %s goto %s_lit2", max, label)
+ emit(code, " if rep >= %s goto %s", max, testlabel)
greedy_1:
- emit(code, " $S0 = substr target, pos, litlen")
- emit(code, " if $S0 != lit goto %s_lit2", label)
+ bsr test
emit(code, " inc rep")
- emit(code, " pos += litlen")
- emit(code, " goto %s_lit1", label)
- emit(code, " %s_lit2:", label)
+ emit(code, " goto %s_g1", label)
+ emit(code, " %s:", testlabel)
emit(code, " if rep < %s goto fail", min)
- if iscut goto greedy_cut
+ if iscut goto cut
emit(code, " if rep == %s goto %s", min, next)
- self.emitsub(code, next, "pos", "rep", "litlen")
+ self.emitsub(code, next, "pos", "rep", "$I1")
emit(code, " dec rep")
- emit(code, " pos -= litlen")
- emit(code, " goto %s_lit2", label)
- .return ()
- greedy_cut:
- emit(code, " goto %s", next)
+ emit(code, " pos -= %s", fixlen)
+ emit(code, " goto %s", testlabel)
.return ()
lazy:
- emit(code, " %s_lit1:", label)
- if min < 1 goto lazy_1
- emit(code, " if rep < %s goto %s_lit2", min, label)
+ if min > 1 goto lazy_1
+ emit(code, " goto %s_l2", label)
lazy_1:
- if iscut == 0 goto lazy_2
- emit(code, " goto %s", next)
- goto lazy_4
- lazy_2:
- if max == PGE_INF goto lazy_3
+ emit(code, " %s_l1:", label)
+ bsr test
+ emit(code, " if rep < %s goto %s_lit1", min, label)
+ emit(code, " %s_l2:", label)
+ if iscut goto cut
+ if max == PGE_INF goto lazy_2
emit(code, " if rep >= %s goto %s", max, next)
- lazy_3:
- self.emitsub(code, next, "pos", "rep", "lit", "litlen")
+ lazy_2:
+ self.emitsub(code, next, "pos", "rep", "$I1", "$S1")
lazy_4:
- emit(code, " %s_lit2:", label)
- emit(code, " $S0 = substr target, pos, litlen")
- emit(code, " if $S0 != lit goto fail")
- emit(code, " inc rep")
- emit(code, " pos += litlen")
- emit(code, " goto %s_lit1", label)
+ emit(code, " goto %s_l1", label)
.return ()
+ cut:
+ emit(code, " goto %s", next)
+ .return ()
+
+ test:
+ emit(code, " if pos >= lastpos goto %s", testlabel)
+ emit(code, " $S0 = substr target, pos, %s", fixlen)
+ if ignorecase == 0 goto test_1
+ emit(code, " downcase $S0")
+ test_1:
+ emit(code, testcode, fixstr, testlabel)
+ emit(code, " pos += %s", fixlen)
+ ret
.end
.sub "gencapture" :method
@@ -383,20 +387,51 @@ register.
.param string next
.local pmc emit
($I0, $I1, $I2, $I3, $S0) = self."quant"()
- $I0 = self["ignorecase"]
emit = find_global "PGE::Exp", "emit"
$S1 = self["value"]
- if $I0 == 0 goto init_1
+ $I0 = self["ignorecase"]
+ if $I0 == 0 goto lit_1
downcase $S1
- init_1:
+ lit_1:
+ $I1 = length $S1
$P0 = find_global "Data::Escape", "String"
$S1 = $P0($S1, '"')
emit(code, "\n %s: # literal %s ##", label, $S0)
- emit(code, " lit = \"%s\"", $S1)
- self.genliteral(code, label, next)
+ $S0 = " if $S0 != unicode:\"%s\" goto %s"
+ self.genfixedstr(code, label, next, $S0, $S1, $I1)
+ .return ()
+.end
+
+.namespace [ "PGE::Exp::EnumCharList" ]
+
+.sub "gen" :method
+ .param pmc code
+ .param string label
+ .param string next
+ .local pmc emit
+ ($I0, $I1, $I2, $I3, $S0) = self."quant"()
+ emit = find_global "PGE::Exp", "emit"
+ $S1 = self["value"]
+ $I0 = self["ignorecase"]
+ if $I0 == 0 goto charlist_1
+ downcase $S1
+ charlist_1:
+ emit(code, "\n %s: # charclass %s ##", label, $S0)
+ $I1 = length $S1
+ $P0 = find_global "Data::Escape", "String"
+ $S1 = $P0($S1, '"')
+ $I0 = self["isnegated"]
+ if $I0 goto charlist_2
+ $S0 = " $I0 = index unicode:\"%s\", $S0\n if $I0 < 0 goto %s"
+ self.genfixedstr(code, label, next, $S0, $S1, 1)
+ .return ()
+ charlist_2:
+ $S0 = " $I0 = index unicode:\"%s\", $S0\n if $I0 >= 0 goto %s"
+ self.genfixedstr(code, label, next, $S0, $S1, 1)
.return ()
.end
+
.namespace [ "PGE::Exp::Scalar" ]
.sub "gen" method
@@ -414,8 +449,10 @@ register.
emit(code, " if $I0 == 0 goto %s_0", label)
emit(code, " $P0 = $P0[-1]")
emit(code, " %s_0:", label)
- emit(code, " lit = $P0")
- self.genliteral(code, label, next)
+ emit(code, " $S1 = $P0")
+ emit(code, " $I1 = length $S1")
+ $S0 = " if $S0 != %s goto %s"
+ self."genfixedstr"(code, label, next, $S0, "$S1", "$I1")
.return ()
.end
@@ -498,66 +535,6 @@ register.
.return ()
.end
-.namespace [ "PGE::Exp::EnumCharList" ]
-
-.sub "gen" :method
- .param pmc code
- .param string label
- .param string next
- .local string charlist, charmatch
- .local int min, max, islazy, iscut
- .local pmc emit
- (min, max, islazy, iscut, $S0) = self."quant"()
- emit = find_global "PGE::Exp", "emit"
- $P0 = find_global "Data::Escape", "String"
- charlist = self["charlist"]
- charlist = $P0(charlist, '"')
- charmatch = self["charmatch"]
- emit(code, "\n %s: # enumchars %s %s ##", label, charlist, $S0)
- emit(code, " rep = 0")
- if islazy goto lazy
- emit(code, " %s_1:", label)
- emit(code, " if pos >= lastpos goto %s_2", label)
- emit(code, " if rep >= %s goto %s_2", max, label)
- emit(code, " $S0 = substr target, pos, 1")
- emit(code, " $I0 = index \"%s\", $S0", charlist)
- emit(code, " %s $I0 == -1 goto %s_2", charmatch, label)
- emit(code, " inc pos")
- emit(code, " inc rep")
- emit(code, " goto %s_1", label)
- emit(code, " %s_2:", label)
- emit(code, " if rep < %s goto fail", min)
- if iscut goto cut
- emit(code, " if rep == %s goto %s", min, next)
- self.emitsub(code, next, "pos", "rep")
- emit(code, " dec pos")
- emit(code, " dec rep")
- emit(code, " goto %s_2", label)
- .return ()
- lazy:
- emit(code, " %s_0:", label)
- emit(code, " if rep < %s goto %s_1", min, label)
- unless iscut goto lazy_1
- emit(code, " goto %s", next)
- goto lazy_2
- lazy_1:
- emit(code, " if rep >= %s goto %s", max, next)
- emit(code, " if pos > lastpos goto fail")
- self.emitsub(code, next, "pos", "rep")
- lazy_2:
- emit(code, " %s_1:", label)
- emit(code, " $S0 = substr target, pos, 1")
- emit(code, " $I0 = index \"%s\", $S0", charlist)
- emit(code, " %s $I0 == -1 goto fail", charmatch)
- emit(code, " inc rep")
- emit(code, " inc pos")
- emit(code, " goto %s_0", label)
- .return ()
- cut:
- emit(code, " goto %s", next)
- .return ()
-.end
-
.namespace [ "PGE::Exp::Concat" ]
Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir Thu Nov 3 06:47:05 2005
@@ -67,6 +67,16 @@
optable.addtok("prefix::", "<infix:|", "nows", $P0)
optable.addtok("close:>", "<prefix::", "nows")
+
+ $P0 = new Hash
+ store_global "PGE::P6Rule", "%escape", $P0
+ $P0["e"] = "\e"
+ $P0["f"] = "\f"
+ $P0["r"] = "\r"
+ $P0["t"] = "\t"
+ $P0["v"] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
+ $P0["h"] =
unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
+ # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
.end
@@ -76,6 +86,7 @@
.local string target
.local int pos, lastpos
.local int litstart, litlen
+ .local string initchar
newfrom = find_global "PGE::Match", "newfrom"
$P0 = getattribute mob, "PGE::Match\x0$:target"
target = $P0
@@ -85,18 +96,33 @@
$I0 = is_cclass .CCLASS_WHITESPACE, target, pos
if $I0 goto term_ws
- $S0 = substr target, pos, 1
- if $S0 == '#' goto term_ws
+ initchar = substr target, pos, 1
+ inc pos
+ if initchar == "#" goto term_ws
+ if initchar != "\\" goto term_literal
+
+ term_backslash:
+ initchar = substr target, pos, 1
+ $I1 = is_cclass .CCLASS_UPPERCASE, target, pos
+ inc pos
+ $S0 = downcase initchar
+ $P0 = find_global "PGE::P6Rule", "%escape"
+ $I0 = exists $P0[$S0] # \e\f\h\r\t\v etc...
+ if $I0 == 0 goto term_literal
+ initchar = $P0[$S0]
+ if $I1 goto term_charlist # negated escapes
+ $I0 = length initchar
+ if $I0 < 2 goto term_literal
+ term_charlist:
+ mob = newfrom(mob, 0, "PGE::Exp::EnumCharList")
+ mob["value"] = initchar
+ mob["isnegated"] = $I1
+ goto end
- term_literal:
+ term_literal: # first char is in initchar
mob = newfrom(mob, 0, "PGE::Exp::Literal")
litstart = pos
litlen = 0
- $S0 = substr target, pos, 1
- inc pos
- if $S0 != "\\" goto term_literal_loop
- inc litstart
- inc pos
term_literal_loop:
if pos >= lastpos goto term_literal_end
$I0 = is_cclass .CCLASS_WHITESPACE, target, pos
@@ -108,11 +134,12 @@
inc litlen
goto term_literal_loop
term_literal_end:
- if litlen < 2 goto term_literal_one
+ if litlen < 1 goto term_literal_one
dec pos
term_literal_one:
$I0 = pos - litstart
$S0 = substr target, litstart, $I0
+ $S0 = concat initchar, $S0
mob["value"] = $S0
goto end
@@ -335,13 +362,13 @@
(mob, target, mfrom, mpos) = $P0(mob, 0, "PGE::Exp::EnumCharList")
lastpos = length target
charlist = ""
- mob["charmatch"] = "if"
+ mob["isnegated"] = 0
pos = mfrom
isrange = 0
$S0 = substr target, pos, 3
pos += 2
if $S0 != "<-[" goto scan
- mob["charmatch"] = "unless"
+ mob["isnegated"] = 1
inc pos
scan:
if pos >= lastpos goto err_close
@@ -383,7 +410,7 @@
if $S0 != "]>" goto err_bracket
pos += 2
mpos = pos
- mob["charlist"] = charlist
+ mob["value"] = charlist
goto end
err_bracket:
parse_error(mob, pos, "Unescaped ']' in charlist")