Author: pmichaud
Date: Sat May 7 22:39:51 2005
New Revision: 8005
Modified:
trunk/compilers/pge/PGE/Exp.pir
trunk/compilers/pge/PGE/Match.pir
trunk/compilers/pge/PGE/P6Rule.pir
Log:
Added <?ws> and :w to rules.
Fixed cut semantics for nested groups.
Fixed rule and subrule calling conventions.
Added subrules and aliased captures.
Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir (original)
+++ trunk/compilers/pge/PGE/Exp.pir Sat May 7 22:39:51 2005
@@ -14,6 +14,7 @@
PGE::Scalar - match a scalar
PGE::Dot - match any character
PGE::CCShortcut - character class shortcuts (\d, \D, \w, etc.)
+ PGE::WS - <?ws> rule
PGE::Anchor - matching of ^, ^^, $, $$, \b, \B anchors
PGE::Cut - :: and :::
PGE::Concat - concatenation of expressions
@@ -43,6 +44,7 @@
$P0 = subclass expclass, "PGE::Exp::Scalar"
$P0 = subclass expclass, "PGE::Exp::Dot"
$P0 = subclass expclass, "PGE::Exp::CCShortcut"
+ $P0 = subclass expclass, "PGE::Exp::WS"
$P0 = subclass expclass, "PGE::Exp::Anchor"
$P0 = subclass expclass, "PGE::Exp::Cut"
$P0 = subclass expclass, "PGE::Exp::Concat"
@@ -284,7 +286,7 @@
emit(code, " restore %s", str1)
emitsub_2:
unless docut goto end
- emit(code, " if cutting > 0 goto fail")
+ emit(code, " if cutting != 0 goto fail")
end:
.end
@@ -378,11 +380,17 @@
emit(code, ".sub _pge_rule")
emit(code, " .param string target")
emit(code, " .param int pos")
+ emit(code, " .param int lastpos")
emit(code, " .local pmc mob")
+ emit(code, " unless argcI < 2 goto rule_1")
+ emit(code, " lastpos = length target")
+ emit(code, " rule_1:")
+ emit(code, " unless argcI < 1 goto rule_2")
+ emit(code, " pos = -1")
+ emit(code, " rule_2:")
emit(code, " newsub $P0, .Coroutine, _pge_rule_coroutine")
emit(code, " $P1 = find_global \"PGE::Match\", \"start\"")
- emit(code, " (mob) = $P1(target, $P0)")
- emit(code, " .return (mob)")
+ emit(code, " .return $P1(target, $P0, pos, lastpos)")
emit(code, ".end")
emit(code, "")
emit(code, ".sub _pge_rule_coroutine")
@@ -401,12 +409,11 @@
emit(code, " .local int iscreator")
emit(code, " gpad = new PerlArray")
emit(code, " cpad = new PerlArray")
+ emit(code, " push gpad, -1")
emit(code, " push cpad, mob")
emit(code, " from = getattribute mob, \"PGE::Match\\x0$:from\"")
emit(code, " cutting = 0")
- emit(code, " unless argcI > 1 goto setpos")
- emit(code, " lastpos = length target")
- emit(code, " setpos:")
+ emit(code, " if pos >= 0 goto try_at_pos")
emit(code, " pos = 0")
emit(code, " try_match:")
emit(code, " if pos > lastpos goto fail_forever")
@@ -419,11 +426,14 @@
emit(code, " if $I0 < 0 goto try_again")
gen_1:
emit(code, " from = pos")
- self.emitsub(code, label, "pos", 0)
- emit(code, " if cutting > 1 goto fail_forever")
+ self.emitsub(code, label, "pos", "from", 0)
+ emit(code, " if cutting != 0 goto fail_forever")
emit(code, " try_again:")
emit(code, " inc pos")
emit(code, " goto try_match")
+ emit(code, " try_at_pos:")
+ emit(code, " from = pos")
+ self.emitsub(code, label, 0)
emit(code, " fail_forever:")
emit(code, " .yield(-2)")
emit(code, " goto fail_forever")
@@ -630,6 +640,38 @@
emit(code, " goto fail")
.end
+.namespace [ "PGE::Exp::WS" ]
+
+.sub "gen" method
+ .param pmc code
+ .param string label
+ .param string next
+ .local pmc emit
+ emit = find_global "PGE::Exp", "emit"
+ emit(code, "\n %s: # <?ws>", label)
+ emit(code, " rep = 0")
+ emit(code, " if pos >= lastpos goto %s", next)
+ emit(code, " if pos < 1 goto %s_1", label)
+ emit(code, " $I0 = is_wordchar target, pos")
+ emit(code, " unless $I0 goto %s_1", label)
+ emit(code, " $I0 = pos - 1")
+ emit(code, " $I0 = is_wordchar target, $I0")
+ emit(code, " if $I0 goto fail")
+ emit(code, " %s_1:", label)
+ emit(code, " if pos >= lastpos goto %s_2", label)
+ emit(code, " $I0 = is_whitespace target, pos")
+ emit(code, " unless $I0 goto %s_2", label)
+ emit(code, " inc rep")
+ emit(code, " inc pos")
+ emit(code, " goto %s", label)
+ emit(code, " %s_2:", label)
+ emit(code, " if rep == 0 goto %s", next)
+ self.emitsub(code, next, "pos", "rep")
+ emit(code, " dec rep")
+ emit(code, " dec pos")
+ emit(code, " goto %s_2", label)
+.end
+
.namespace [ "PGE::Exp::Anchor" ]
@@ -680,7 +722,7 @@
word_1:
emit(code, " if $I0 == $I1 goto %s", next)
end:
- emit(code, "goto fail")
+ emit(code, " goto fail")
.end
@@ -727,16 +769,16 @@
.param string next
.param string token
.local pmc emit
- .local int cutting
+ .local string cutting
token = self["token"]
- cutting = 1 # :: cut alternation
+ cutting = "gpad[-1]" # :: cut alternation
unless token == ":::" goto cut_1 # ::: cut rule
- cutting = 2
+ cutting = "-1"
cut_1:
emit = find_global "PGE::Exp", "emit"
emit(code, "\n %s:", label)
- self.emitsub(code, next)
- emit(code, " cutting = %d", cutting)
+ self.emitsub(code, next, 0)
+ emit(code, " cutting = %s", cutting)
emit(code, " goto fail")
.end
@@ -919,7 +961,7 @@
emit(code, " delete cobcapt[%s]", captname)
emit(code, " %s_i4:", label)
init_2:
- emit(code, " if cutting != 1 goto fail")
+ emit(code, " unless cutting == %d goto fail", myserno)
emit(code, " cutting = 0")
emit(code, " goto fail")
emit(code, " %s_1:", label)
@@ -938,9 +980,6 @@
emit(code, " inc rep")
emit(code, " gpad[-2] = rep")
self.emitsub(code, sublabel, "pos", "rep")
- unless iscut goto greedy_1
- emit(code, " $I0 = gpad[-2]")
- emit(code, " if $I0 < 0 goto fail")
greedy_1:
emit(code, " dec rep")
emit(code, " %s_g1:", label)
@@ -952,8 +991,11 @@
self.emitsub(code, next, "capt", "rep", "$P0", 0)
emit(code, " push cpad, $P0")
emit(code, " push gpad, capt")
- emit(code, " push gpad, -1")
+ emit(code, " push gpad, rep")
emit(code, " push gpad, %d", myserno)
+ unless iscut goto greedy_2
+ emit(code, " cutting = %d", myserno)
+ greedy_2:
emit(code, " goto fail")
goto subpat
lazy:
@@ -968,6 +1010,7 @@
emit(code, " push gpad, rep")
emit(code, " push gpad, %d", myserno)
unless iscut goto lazy_1
+ emit(code, " cutting = %d", myserno)
emit(code, " goto fail")
lazy_1:
emit(code, " %s_l1:", label)
@@ -1006,20 +1049,20 @@
goto end
subrule:
emit(code, " %s:", sublabel)
- emit(code, " $P1 = find_global '%s'", rname)
+ emit(code, " $P1 = find_name '%s'", rname)
emit(code, " saveall")
- emit(code, " $P0 = $P1(target)")
+ emit(code, " $P0 = $P1(target, pos, lastpos)")
emit(code, " pos = $P0.to()")
- emit(code, " save $P0")
emit(code, " save pos")
+ emit(code, " save $P0")
emit(code, " restoreall")
- emit(code, " restore pos")
emit(code, " restore $P0")
+ emit(code, " restore pos")
emit(code, " unless $P0 goto %s_s4", label)
emit(code, " push capt, $P0")
emit(code, " %s_s2:", label)
self.emitsub(code, label, "pos", "$P0", 0)
- emit(code, " if cutting > 0 goto %s_s3", label)
+ emit(code, " unless cutting == 0 goto %s_s3", label)
emit(code, " saveall")
emit(code, " $P0.next()")
emit(code, " pos = $P0.to()")
Modified: trunk/compilers/pge/PGE/Match.pir
==============================================================================
--- trunk/compilers/pge/PGE/Match.pir (original)
+++ trunk/compilers/pge/PGE/Match.pir Sat May 7 22:39:51 2005
@@ -34,16 +34,17 @@
.sub "start"
.param string target # target
.param pmc yield # coroutine
+ .param int pos # where to start
+ .param int lastpos # length of target
.local pmc me # newly created match obj
.local int offset # offset for attributes
- .local int lastpos # length of target
$P0 = new String
$P0 = target
$I0 = find_type "PGE::Match"
me = new $I0, $P0
setattribute me, "PGE::Match\x0&:yield", yield
- yield(me, target, 0, lastpos) # start match
+ yield(me, target, pos, lastpos) # start match
.return (me)
.end
@@ -159,7 +160,7 @@
.return ("")
.end
-=item C<__get_pmc_keyed_integer(INT key)>
+=item C<__get_pmc_keyed_int(INT key)>
Returns the subpattern capture associated with key. Note that
this will return either a single Match object or an array of
@@ -178,7 +179,28 @@
end:
.return ($P1)
.end
-
+
+=item C<__get_pmc_keyed_str(STR key)>
+
+Returns the subrule capture associated with C<key>. Note that
+this can return either a single Match object or an array of
+Match objects depending on the rule.
+
+=cut
+
+.sub "__get_pmc_keyed_str" method
+ .param int key
+ .local pmc capt
+ capt = getattribute self, "PGE::Match\x0%:capt"
+ $P0 = capt[key]
+ $P1 = getprop "isarray", $P0
+ unless $P1 goto end
+ $P1 = $P1[-1]
+ end:
+ .return ($P1)
+.end
+
+
=item C<dump()>
Produces a data dump of the match object and all of its subcaptures.
Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir Sat May 7 22:39:51 2005
@@ -65,9 +65,9 @@
p6meta['$7'] = $P0
p6meta['$8'] = $P0
p6meta['$9'] = $P0
- # $P0 = find_global "PGE::P6Rule", "p6rule_parse_assert" # XXX: TODO
- # p6meta['<'] = $P0
- # p6meta['>'] = u
+ $P0 = find_global "PGE::P6Rule", "p6rule_parse_subrule" # XXX: TODO
+ p6meta['<'] = $P0
+ p6meta['>'] = u
$P0 = find_global "PGE::P6Rule", "p6rule_parse_charclass"
p6meta['\d'] = $P0
p6meta['\D'] = $P0
@@ -202,6 +202,8 @@
lit = ''
literal:
+ $I0 = lex["ws"]
+ if $I0 goto literal_end
pos = lex["pos"]
c = substr pattern, pos, 1 # get current character
if c == "\\" goto isslashmeta # possibly a \ escape
@@ -322,13 +324,13 @@
.return (exp)
.end
-=item C<p6rule_parse_assert(STR pattern, PMC lex)>
+=item C<p6rule_parse_subrule(STR pattern, PMC lex)>
-Parses an assertion (such as a subrule).
+Parses subrules.
=cut
-.sub p6rule_parse_assert
+.sub p6rule_parse_subrule
.param string pattern
.param pmc lex
.param string token
@@ -337,29 +339,36 @@
p6rule_parse_skip(pattern, lex, 1)
pos = lex["pos"]
$I0 = pos
- assert_1:
+ subrule_1:
$I1 = is_wordchar pattern, pos
- unless $I1 goto assert_2
+ unless $I1 goto subrule_2
inc pos
- goto assert_1
- assert_2:
+ goto subrule_1
+ subrule_2:
$I1 = pos - $I0
- if $I1 > 0 goto assert_3
+ if $I1 > 0 goto subrule_3
p6rule_parse_error(pattern, lex, "invalid subrule name")
- assert_3:
+ subrule_3:
$P0 = find_global "PGE::Exp", "new"
$P1 = $P0("PGE::Exp")
exp = $P0("PGE::Exp::Group", $P1)
$S0 = substr pattern, $I0, $I1
exp["rname"] = $S0
exp["cname"] = $S0
+ $I0 = exists lex["cname"]
+ unless $I0 goto subrule_4
+ $P0 = lex["cname"]
+ exp["cname"] = $P0
+ subrule_4:
$S0 = substr pattern, pos, 1
- if $S0 == '>' goto assert_4
+ if $S0 == '>' goto subrule_5
p6rule_parse_error(pattern, lex, "missing closing '>'")
- assert_4:
+ goto subrule_6
+ subrule_5:
inc pos
- $I1 = pos - $I0
- p6rule_parse_skip(pattern, lex, $I1)
+ lex["pos"] = pos
+ p6rule_parse_skip(pattern, lex, 0)
+ subrule_6:
.return (exp)
.end
@@ -376,7 +385,6 @@
.local int pos, plen
.local int subp
.local pmc exp
-
pos = lex["pos"] # get current position
inc pos # skip past '$'
@@ -618,6 +626,7 @@
Parse a concatenated sequence of rule expressions, terminated
by a closing group character, an alternation, or a conjunction.
+We also generate <?ws> rules as needed here.
XXX: We need to add an option here to allow other characters to
terminate the expression.
@@ -628,17 +637,30 @@
.param pmc lex
.local pmc exp
.local pmc p6meta
+ .local int words
+ $P0 = find_global "PGE::Exp", "new"
+ words = lex["words"]
+ unless words goto concat_1
+ $I0 = lex["ws"]
+ unless $I0 goto concat_1
+ (exp) = $P0("PGE::Exp::WS")
+ lex["ws"] = 0
+ goto concat_2
+ concat_1:
(exp) = "p6rule_parse_quant"(pattern, lex)
+ unless words goto concat_2
+ $I0 = lex["ws"]
+ if $I0 goto concat_3
+ concat_2:
$I0 = lex["pos"]
$S0 = substr pattern, $I0, 1
if $S0 == '' goto end
$I0 = index "])|&", $S0
if $I0 >= 0 goto end
- concat:
- ($P0) = "p6rule_parse_concat"(pattern, lex)
- $P1 = find_global "PGE::Exp", "new"
- (exp) = $P1("PGE::Exp::Concat", exp, $P0)
+ concat_3:
+ ($P1) = "p6rule_parse_concat"(pattern, lex)
+ (exp) = $P0("PGE::Exp::Concat", exp, $P1)
end:
.return (exp)
.end
@@ -688,9 +710,21 @@
.sub "p6rule_parse_exp"
.param string pattern
.param pmc lex
+ .local int words
+ .local int pos
.local pmc exp
+ words = lex["words"]
+ $I0 = lex["ws"]
+ if $I0 goto exp_1
+ pos = lex["pos"]
+ $S0 = substr pattern, pos, 2
+ unless $S0 == ':w' goto exp_1 # XXX: only does ':w'
+ lex["words"] = 1
+ p6rule_parse_skip(pattern, lex, 2)
+ exp_1:
(exp) = "p6rule_parse_alt"(pattern, lex)
+ lex["words"] = words
.return (exp)
.end
@@ -717,6 +751,8 @@
lex["subp"] = 0
$I0 = length pattern
lex["plen"] = $I0
+ $P0 = find_global "PGE::P6Rule", "p6rule_parse_skip"
+ $P0(pattern, lex, 0)
$P0 = find_global "PGE::P6Rule", "p6rule_parse_exp"
(exp) = $P0(pattern, lex)