Author: pmichaud
Date: Sat May  7 22:39:51 2005
New Revision: 8005

Modified:
   trunk/compilers/pge/PGE/Exp.pir
   trunk/compilers/pge/PGE/Match.pir
   trunk/compilers/pge/PGE/P6Rule.pir
Log:
Added <?ws> and :w to rules.
Fixed cut semantics for nested groups.
Fixed rule and subrule calling conventions.
Added subrules and aliased captures.



Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir     (original)
+++ trunk/compilers/pge/PGE/Exp.pir     Sat May  7 22:39:51 2005
@@ -14,6 +14,7 @@
     PGE::Scalar    - match a scalar
     PGE::Dot       - match any character
     PGE::CCShortcut - character class shortcuts (\d, \D, \w, etc.)
+    PGE::WS        - <?ws> rule
     PGE::Anchor    - matching of ^, ^^, $, $$, \b, \B anchors
     PGE::Cut       - :: and :::
     PGE::Concat    - concatenation of expressions
@@ -43,6 +44,7 @@
     $P0 = subclass expclass, "PGE::Exp::Scalar"
     $P0 = subclass expclass, "PGE::Exp::Dot"
     $P0 = subclass expclass, "PGE::Exp::CCShortcut"
+    $P0 = subclass expclass, "PGE::Exp::WS"
     $P0 = subclass expclass, "PGE::Exp::Anchor"
     $P0 = subclass expclass, "PGE::Exp::Cut"
     $P0 = subclass expclass, "PGE::Exp::Concat"
@@ -284,7 +286,7 @@
     emit(code, "    restore %s", str1)
   emitsub_2:
     unless docut goto end
-    emit(code, "    if cutting > 0 goto fail")
+    emit(code, "    if cutting != 0 goto fail")
   end:
 .end
 
@@ -378,11 +380,17 @@
     emit(code, ".sub _pge_rule")
     emit(code, "    .param string target")
     emit(code, "    .param int pos")
+    emit(code, "    .param int lastpos")
     emit(code, "    .local pmc mob")
+    emit(code, "    unless argcI < 2 goto rule_1")
+    emit(code, "    lastpos = length target")
+    emit(code, "  rule_1:")
+    emit(code, "    unless argcI < 1 goto rule_2")
+    emit(code, "    pos = -1")
+    emit(code, "  rule_2:")
     emit(code, "    newsub $P0, .Coroutine, _pge_rule_coroutine")
     emit(code, "    $P1 = find_global \"PGE::Match\", \"start\"")
-    emit(code, "    (mob) = $P1(target, $P0)")
-    emit(code, "    .return (mob)")
+    emit(code, "    .return $P1(target, $P0, pos, lastpos)")
     emit(code, ".end")
     emit(code, "")
     emit(code, ".sub _pge_rule_coroutine")
@@ -401,12 +409,11 @@
     emit(code, "    .local int iscreator")
     emit(code, "    gpad = new PerlArray")
     emit(code, "    cpad = new PerlArray")
+    emit(code, "    push gpad, -1")
     emit(code, "    push cpad, mob")
     emit(code, "    from = getattribute mob, \"PGE::Match\\x0$:from\"")
     emit(code, "    cutting = 0")
-    emit(code, "    unless argcI > 1 goto setpos")
-    emit(code, "    lastpos = length target")
-    emit(code, "  setpos:")
+    emit(code, "    if pos >= 0 goto try_at_pos")
     emit(code, "    pos = 0")
     emit(code, "  try_match:")
     emit(code, "    if pos > lastpos goto fail_forever")
@@ -419,11 +426,14 @@
     emit(code, "    if $I0 < 0 goto try_again")
   gen_1:
     emit(code, "    from = pos")
-    self.emitsub(code, label, "pos", 0)
-    emit(code, "    if cutting > 1 goto fail_forever")
+    self.emitsub(code, label, "pos", "from", 0)
+    emit(code, "    if cutting != 0 goto fail_forever")
     emit(code, "  try_again:")
     emit(code, "    inc pos")
     emit(code, "    goto try_match")
+    emit(code, "  try_at_pos:")
+    emit(code, "    from = pos")
+    self.emitsub(code, label, 0)
     emit(code, "  fail_forever:")
     emit(code, "    .yield(-2)")
     emit(code, "    goto fail_forever")
@@ -630,6 +640,38 @@
     emit(code, "    goto fail")
 .end
 
+.namespace [ "PGE::Exp::WS" ]
+
+.sub "gen" method
+    .param pmc code
+    .param string label
+    .param string next
+    .local pmc emit
+    emit = find_global "PGE::Exp", "emit"
+    emit(code, "\n  %s: # <?ws>", label)
+    emit(code, "    rep = 0")
+    emit(code, "    if pos >= lastpos goto %s", next)
+    emit(code, "    if pos < 1 goto %s_1", label)
+    emit(code, "    $I0 = is_wordchar target, pos")
+    emit(code, "    unless $I0 goto %s_1", label)
+    emit(code, "    $I0 = pos - 1")
+    emit(code, "    $I0 = is_wordchar target, $I0")
+    emit(code, "    if $I0 goto fail")
+    emit(code, "  %s_1:", label)
+    emit(code, "    if pos >= lastpos goto %s_2", label)
+    emit(code, "    $I0 = is_whitespace target, pos")
+    emit(code, "    unless $I0 goto %s_2", label)
+    emit(code, "    inc rep")
+    emit(code, "    inc pos")
+    emit(code, "    goto %s", label)
+    emit(code, "  %s_2:", label)
+    emit(code, "    if rep == 0 goto %s", next)
+    self.emitsub(code, next, "pos", "rep")
+    emit(code, "    dec rep")
+    emit(code, "    dec pos")
+    emit(code, "    goto %s_2", label)
+.end
+
 
 .namespace [ "PGE::Exp::Anchor" ]
 
@@ -680,7 +722,7 @@
   word_1:
     emit(code, "    if $I0 == $I1 goto %s", next)
   end:
-    emit(code, "goto fail")
+    emit(code, "    goto fail")
 .end
 
 
@@ -727,16 +769,16 @@
     .param string next
     .param string token
     .local pmc emit
-    .local int cutting
+    .local string cutting
     token = self["token"]
-    cutting = 1                                    # :: cut alternation
+    cutting = "gpad[-1]"                           # :: cut alternation
     unless token == ":::" goto cut_1               # ::: cut rule
-    cutting = 2
+    cutting = "-1"
   cut_1:
     emit = find_global "PGE::Exp", "emit"
     emit(code, "\n  %s:", label)
-    self.emitsub(code, next)
-    emit(code, "    cutting = %d", cutting)
+    self.emitsub(code, next, 0)
+    emit(code, "    cutting = %s", cutting)
     emit(code, "    goto fail")
 .end
 
@@ -919,7 +961,7 @@
     emit(code, "    delete cobcapt[%s]", captname)
     emit(code, "  %s_i4:", label)
   init_2:
-    emit(code, "    if cutting != 1 goto fail")
+    emit(code, "    unless cutting == %d goto fail", myserno)
     emit(code, "    cutting = 0")
     emit(code, "    goto fail")
     emit(code, "  %s_1:", label)
@@ -938,9 +980,6 @@
     emit(code, "    inc rep")
     emit(code, "    gpad[-2] = rep")
     self.emitsub(code, sublabel, "pos", "rep")
-    unless iscut goto greedy_1
-    emit(code, "    $I0 = gpad[-2]")
-    emit(code, "    if $I0 < 0 goto fail")
   greedy_1:
     emit(code, "    dec rep")
     emit(code, "  %s_g1:", label)
@@ -952,8 +991,11 @@
     self.emitsub(code, next, "capt", "rep", "$P0", 0)
     emit(code, "    push cpad, $P0")
     emit(code, "    push gpad, capt")
-    emit(code, "    push gpad, -1")
+    emit(code, "    push gpad, rep")
     emit(code, "    push gpad, %d", myserno)
+    unless iscut goto greedy_2
+    emit(code, "    cutting = %d", myserno)
+  greedy_2:
     emit(code, "    goto fail")
     goto subpat
   lazy:
@@ -968,6 +1010,7 @@
     emit(code, "    push gpad, rep")
     emit(code, "    push gpad, %d", myserno)
     unless iscut goto lazy_1
+    emit(code, "    cutting = %d", myserno)
     emit(code, "    goto fail")
   lazy_1:
     emit(code, "  %s_l1:", label)
@@ -1006,20 +1049,20 @@
     goto end
   subrule:
     emit(code, "  %s:", sublabel)
-    emit(code, "    $P1 = find_global '%s'", rname)
+    emit(code, "    $P1 = find_name '%s'", rname)
     emit(code, "    saveall")
-    emit(code, "    $P0 = $P1(target)")
+    emit(code, "    $P0 = $P1(target, pos, lastpos)")
     emit(code, "    pos = $P0.to()")
-    emit(code, "    save $P0")
     emit(code, "    save pos")
+    emit(code, "    save $P0")
     emit(code, "    restoreall")
-    emit(code, "    restore pos")
     emit(code, "    restore $P0")
+    emit(code, "    restore pos")
     emit(code, "    unless $P0 goto %s_s4", label)
     emit(code, "    push capt, $P0")
     emit(code, "  %s_s2:", label)
     self.emitsub(code, label, "pos", "$P0", 0)
-    emit(code, "    if cutting > 0 goto %s_s3", label)
+    emit(code, "    unless cutting == 0 goto %s_s3", label)
     emit(code, "    saveall")
     emit(code, "    $P0.next()")
     emit(code, "    pos = $P0.to()")

Modified: trunk/compilers/pge/PGE/Match.pir
==============================================================================
--- trunk/compilers/pge/PGE/Match.pir   (original)
+++ trunk/compilers/pge/PGE/Match.pir   Sat May  7 22:39:51 2005
@@ -34,16 +34,17 @@
 .sub "start"
     .param string target                           # target
     .param pmc yield                               # coroutine
+    .param int pos                                 # where to start
+    .param int lastpos                             # length of target
     .local pmc me                                  # newly created match obj
     .local int offset                              # offset for attributes
-    .local int lastpos                             # length of target
 
     $P0 = new String
     $P0 = target
     $I0 = find_type "PGE::Match"
     me = new $I0, $P0
     setattribute me, "PGE::Match\x0&:yield", yield
-    yield(me, target, 0, lastpos)                  # start match
+    yield(me, target, pos, lastpos)                  # start match
     .return (me)
 .end
 
@@ -159,7 +160,7 @@
     .return ("")
 .end
 
-=item C<__get_pmc_keyed_integer(INT key)>
+=item C<__get_pmc_keyed_int(INT key)>
 
 Returns the subpattern capture associated with key.  Note that
 this will return either a single Match object or an array of
@@ -178,7 +179,28 @@
   end:
     .return ($P1)
 .end
-    
+
+=item C<__get_pmc_keyed_str(STR key)>
+
+Returns the subrule capture associated with C<key>.  Note that
+this can return either a single Match object or an array of
+Match objects depending on the rule.
+
+=cut
+
+.sub "__get_pmc_keyed_str" method
+    .param int key
+    .local pmc capt
+    capt = getattribute self, "PGE::Match\x0%:capt"
+    $P0 = capt[key]
+    $P1 = getprop "isarray", $P0
+    unless $P1 goto end
+    $P1 = $P1[-1]
+  end:
+    .return ($P1)
+.end
+
+
 =item C<dump()>
 
 Produces a data dump of the match object and all of its subcaptures.

Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir  (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir  Sat May  7 22:39:51 2005
@@ -65,9 +65,9 @@
     p6meta['$7'] = $P0
     p6meta['$8'] = $P0
     p6meta['$9'] = $P0
-    # $P0 = find_global "PGE::P6Rule", "p6rule_parse_assert"      # XXX: TODO
-    # p6meta['<'] = $P0
-    # p6meta['>'] = u
+    $P0 = find_global "PGE::P6Rule", "p6rule_parse_subrule"      # XXX: TODO
+    p6meta['<'] = $P0
+    p6meta['>'] = u
     $P0 = find_global "PGE::P6Rule", "p6rule_parse_charclass"
     p6meta['\d'] = $P0
     p6meta['\D'] = $P0
@@ -202,6 +202,8 @@
     lit = ''
 
   literal:
+    $I0 = lex["ws"]
+    if $I0 goto literal_end
     pos = lex["pos"]
     c = substr pattern, pos, 1             # get current character
     if c == "\\" goto isslashmeta          # possibly a \ escape
@@ -322,13 +324,13 @@
     .return (exp)
 .end
 
-=item C<p6rule_parse_assert(STR pattern, PMC lex)>
+=item C<p6rule_parse_subrule(STR pattern, PMC lex)>
 
-Parses an assertion (such as a subrule).
+Parses subrules.
 
 =cut
 
-.sub p6rule_parse_assert
+.sub p6rule_parse_subrule
     .param string pattern
     .param pmc lex
     .param string token
@@ -337,29 +339,36 @@
     p6rule_parse_skip(pattern, lex, 1)
     pos = lex["pos"]
     $I0 = pos
-  assert_1:
+  subrule_1:
     $I1 = is_wordchar pattern, pos 
-    unless $I1 goto assert_2
+    unless $I1 goto subrule_2
     inc pos
-    goto assert_1
-  assert_2:
+    goto subrule_1
+  subrule_2:
     $I1 = pos - $I0
-    if $I1 > 0 goto assert_3
+    if $I1 > 0 goto subrule_3
     p6rule_parse_error(pattern, lex, "invalid subrule name")
-  assert_3:
+  subrule_3:
     $P0 = find_global "PGE::Exp", "new"
     $P1 = $P0("PGE::Exp")
     exp = $P0("PGE::Exp::Group", $P1)
     $S0 = substr pattern, $I0, $I1
     exp["rname"] = $S0
     exp["cname"] = $S0
+    $I0 = exists lex["cname"]
+    unless $I0 goto subrule_4
+    $P0 = lex["cname"]
+    exp["cname"] = $P0
+  subrule_4:
     $S0 = substr pattern, pos, 1
-    if $S0 == '>' goto assert_4
+    if $S0 == '>' goto subrule_5
     p6rule_parse_error(pattern, lex, "missing closing '>'")
-  assert_4:
+    goto subrule_6
+  subrule_5:
     inc pos
-    $I1 = pos - $I0
-    p6rule_parse_skip(pattern, lex, $I1)
+    lex["pos"] = pos
+    p6rule_parse_skip(pattern, lex, 0)
+  subrule_6:
     .return (exp)
 .end
 
@@ -376,7 +385,6 @@
     .local int pos, plen
     .local int subp
     .local pmc exp
-   
 
     pos = lex["pos"]                               # get current position
     inc pos                                        # skip past '$'
@@ -618,6 +626,7 @@
 
 Parse a concatenated sequence of rule expressions, terminated
 by a closing group character, an alternation, or a conjunction.
+We also generate <?ws> rules as needed here.
 XXX: We need to add an option here to allow other characters to
 terminate the expression.
 
@@ -628,17 +637,30 @@
     .param pmc lex
     .local pmc exp
     .local pmc p6meta
+    .local int words
 
+    $P0 = find_global "PGE::Exp", "new"
+    words = lex["words"]
+    unless words goto concat_1
+    $I0 = lex["ws"]
+    unless $I0 goto concat_1
+    (exp) = $P0("PGE::Exp::WS")
+    lex["ws"] = 0
+    goto concat_2
+  concat_1:  
     (exp) = "p6rule_parse_quant"(pattern, lex)
+    unless words goto concat_2
+    $I0 = lex["ws"]
+    if $I0 goto concat_3
+  concat_2:
     $I0 = lex["pos"]
     $S0 = substr pattern, $I0, 1
     if $S0 == '' goto end
     $I0 = index "])|&", $S0
     if $I0 >= 0 goto end
-  concat:
-    ($P0) = "p6rule_parse_concat"(pattern, lex)
-    $P1 = find_global "PGE::Exp", "new"
-    (exp) = $P1("PGE::Exp::Concat", exp, $P0)
+  concat_3:
+    ($P1) = "p6rule_parse_concat"(pattern, lex)
+    (exp) = $P0("PGE::Exp::Concat", exp, $P1)
   end:
     .return (exp)
 .end
@@ -688,9 +710,21 @@
 .sub "p6rule_parse_exp"
     .param string pattern
     .param pmc lex
+    .local int words
+    .local int pos
     .local pmc exp
 
+    words = lex["words"]
+    $I0 = lex["ws"]
+    if $I0 goto exp_1
+    pos = lex["pos"]
+    $S0 = substr pattern, pos, 2
+    unless $S0 == ':w' goto exp_1                  # XXX: only does ':w'
+    lex["words"] = 1
+    p6rule_parse_skip(pattern, lex, 2)
+  exp_1:
     (exp) = "p6rule_parse_alt"(pattern, lex)
+    lex["words"] = words
     .return (exp)
 .end
 
@@ -717,6 +751,8 @@
     lex["subp"] = 0
     $I0 = length pattern
     lex["plen"] = $I0
+    $P0 = find_global "PGE::P6Rule", "p6rule_parse_skip"
+    $P0(pattern, lex, 0)
     $P0 = find_global "PGE::P6Rule", "p6rule_parse_exp"
     (exp) = $P0(pattern, lex)
 

Reply via email to