Author: pmichaud
Date: Fri Apr 21 10:22:18 2006
New Revision: 12389

Modified:
   branches/pge-pm/compilers/pge/PGE/OPTable.pir
   branches/pge-pm/t/compilers/pge/03-optable.t

Log:
[PGE]:
* Updated OPTable for better handling of close tokens (and missing closes).
* Improved comments a bit (more to come).


Modified: branches/pge-pm/compilers/pge/PGE/OPTable.pir
==============================================================================
--- branches/pge-pm/compilers/pge/PGE/OPTable.pir       (original)
+++ branches/pge-pm/compilers/pge/PGE/OPTable.pir       Fri Apr 21 10:22:18 2006
@@ -152,7 +152,7 @@
     if $I0 goto end
     tokentable[name] = token
 
-    ## don't process undef syntactic categories -- just store
+    ##   don't process undef syntactic categories -- just store
     if mode == 0 goto end
 
     $S0 = args['match']
@@ -266,8 +266,6 @@
 
 .sub "parse" :method
     .param pmc mob
-    .param pmc stoptoken       :optional
-    .param int has_stoptoken   :opt_flag
     .local pmc tokentable, keytable, klentable
     .local pmc tokenstack, operstack, termstack
     .local pmc newfrom
@@ -281,9 +279,7 @@
     .local pmc iter
     .local int tokenmode, topmode
     .local int tokencat, topcat
-    .local int arity
     .local int lastcat
-    .local int circumnest 
 
     tokentable = self
     keytable = getattribute self, "PGE::OPTable\x0%!key"
@@ -298,12 +294,6 @@
     pos = mfrom
     lastpos = length target
     lastcat = PGE_OPTABLE_EMPTY
-    circumnest = 0
-
-    ## if an empty stoptoken was sent, pretend we didn't get one
-    if has_stoptoken == 0 goto expect_term
-    if stoptoken > '' goto expect_term
-    has_stoptoken = 0
 
   expect_term:
     expect = PGE_OPTABLE_EXPECT_TERM
@@ -317,10 +307,12 @@
     expect = PGE_OPTABLE_EXPECT_OPER
 
   token_next:
-    ## Figure out what we're looking for
+    ##   figure out what we're looking for
+    ##   if we're at the end of the string, end match
     wspos = pos
     if pos >= lastpos goto oper_not_found
-    if_null ws, token_next_ws
+    ##   check for leading whitespace -- it may limit token candidates
+    if null ws goto token_next_ws
     mpos = pos
     $P0 = ws(mob)
     unless $P0 goto token_next_1
@@ -330,10 +322,13 @@
     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
   token_next_1:
     nows = 0
+    ##   "nows" tokens are eligible if we don't have leading ws
     if pos == wspos goto key_search
     nows = PGE_OPTABLE_NOWS
 
+  ## look through eligible tokens to find longest match
   key_search:
+    ##   use the next character of input stream to limit search
     key = substr target, pos, 1
     $I0 = klentable[key]
     key = substr target, pos, $I0
@@ -363,34 +358,33 @@
     goto key_loop
   token_nows:
     if pos == wspos goto oper_not_found
+    ##   try again, with the whitespace operators this time
     pos = wspos
     nows = 0
     goto key_search
 
   oper_not_found:
+    ##   we were unable to find a valid token for the current expect state
+    ##   if we're not expecting a term, then end the match here
     $I0 = expect & PGE_OPTABLE_EXPECT_TERM
     if $I0 == 0 goto end
+    ##   otherwise, let's add a "dummy" term to the stack for reduction
     (oper, $S0, $P0, $P1) = newfrom(mob, pos, "PGE::Match")
     push termstack, oper
+    ##   if the current operator doesn't allow nullterm, end match
     unless tokenstack goto end
     top = tokenstack[-1]
     topmode = top["mode"]
     $I0 = topmode & PGE_OPTABLE_NULLTERM
     if $I0 == 0 goto end
+    ##   it's a nullterm operator, so we can continue parsing
     $P1 = pos
     goto expect_oper
 
   oper_found:
-    ## if we're at a stop token, end the parse here
-    if circumnest > 0 goto oper_valid
-    if has_stoptoken == 0 goto oper_valid
-    $P0 = token['name']
-    if $P0 == stoptoken goto oper_not_found
-
-  oper_valid:
     tokenmode = token["mode"]
     tokencat = tokenmode & PGE_OPTABLE_SYNCAT
-    ## this hack handles prelist term followed by postcircumfix op
+    ##   this hack handles prelist term followed by postcircumfix op
     if lastcat != PGE_OPTABLE_PRELIST goto oper_found_1
     if tokencat != PGE_OPTABLE_POSTCIRCUMFIX goto oper_found_1
     $P0 = pop tokenstack
@@ -398,32 +392,30 @@
     push termstack, $P0
   oper_found_1:
     lastcat = tokencat
+    ##   the remainder of this section processes according to the
+    ##   table at the end of this function
     if tokencat == PGE_OPTABLE_TERM goto term_shift
     if tokencat == PGE_OPTABLE_PREFIX goto oper_shift          # (S1)
     if tokencat == PGE_OPTABLE_CIRCUMFIX goto oper_shift       # (S2)
 
-    ## Check that we already have a term
     $I0 = elements termstack                                 
     if $I0 > 0 goto shift_reduce
     if tokencat != PGE_OPTABLE_PRELIST goto end
 
   ## The shift/reduce loop
   shift_reduce:
-    ## If the token stack is empty, shift
     $I0 = elements tokenstack
     if $I0 > 0 goto shift_reduce_1
     if tokencat == PGE_OPTABLE_CLOSE goto end                  # (E3)
     topcat = PGE_OPTABLE_EMPTY
     goto oper_shift                                            # (S3)
   shift_reduce_1:
-    ## Compare with token at top of stack
     top = tokenstack[-1]
     topmode = top["mode"]
     topcat = topmode & PGE_OPTABLE_SYNCAT
     if topcat == PGE_OPTABLE_POSTFIX goto oper_reduce          # (R4)
     if tokencat == PGE_OPTABLE_CLOSE goto oper_close           # (R5, C5)
     if topcat >= PGE_OPTABLE_POSTCIRCUMFIX goto oper_shift     # (S6)
-    ## Check operator precedence
     $P0 = token['precedence']
     $P1 = top['prec_close']
     if $P0 > $P1 goto oper_shift                               # (P)
@@ -440,17 +432,19 @@
     goto shift_reduce
 
   oper_close:
+    ##   if the top operator isn't a circumfix, reduce it
+    ##   if the close token doesn't match circumfix close, end here
+    ##   else shift (fall-through)
     if topcat < PGE_OPTABLE_TERNARY goto oper_reduce           # (R5)
-    $S0 = top["keyclose"]
-    if key != $S0 goto end                                     # (C5)
+    $S0 = top['keyclose']
+    if key != $S0 goto end
 
   oper_shift:
+    ##   shift operator onto the operator stack
     push tokenstack, token
     push operstack, oper
-    if tokencat < PGE_OPTABLE_POSTCIRCUMFIX goto oper_shift_1
-    inc circumnest
-  oper_shift_1:
     pos = oper.to()
+    ##   choose next expect state based on current state
     if tokencat == PGE_OPTABLE_PRELIST goto expect_termpost
     if tokencat >= PGE_OPTABLE_PREFIX goto expect_term
     if tokencat == PGE_OPTABLE_POSTFIX goto expect_oper
@@ -465,13 +459,24 @@
   ## reduce top operation on stack
   reduce:
     $P0 = pop tokenstack
+    $P1 = pop operstack
     topmode = $P0["mode"]
     topcat = topmode & PGE_OPTABLE_SYNCAT
-    if topcat != PGE_OPTABLE_CLOSE goto reduce_1
+    if topcat == PGE_OPTABLE_CLOSE goto reduce_close
+    if topcat < PGE_OPTABLE_POSTCIRCUMFIX goto reduce_normal
+    ##   we have an unbalanced open, so error.  remove the
+    ##   incomplete circumfixed term, and for circumfix: opers 
+    ##   put a failed nullterm onto the termstack
+    wspos = -1
+    $P0 = pop termstack
+    if topcat != PGE_OPTABLE_CIRCUMFIX goto reduce_end
+    (oper, $S0, $P0, $P1) = newfrom(mob, pos, "PGE::Match")
+    goto reduce_end
+  reduce_close:
     $P0 = pop tokenstack
     $P1 = pop operstack
-  reduce_1:
-    $P1 = pop operstack
+  reduce_normal:
+    .local int arity
     topmode = $P0["mode"]
     arity = topmode & PGE_OPTABLE_ARITY
   reduce_args:
@@ -482,11 +487,12 @@
     $P1[arity] = $P2
     goto reduce_args
   reduce_backtrack:
-    wspos = $P1.from()
-    if arity > 0 goto reduce_end
+    wspos = -1
+    if arity > 0 goto end
     push termstack, $P2
     goto reduce_end
   reduce_list:
+    ##   combine matching list associative operations
     $I0 = topmode & PGE_OPTABLE_ASSOC
     if $I0 != PGE_OPTABLE_ASSOC_LIST goto reduce_saveterm
     $S1 = $P1['type']
@@ -499,9 +505,6 @@
   reduce_saveterm:
     push termstack, $P1
   reduce_end:
-    if topcat < PGE_OPTABLE_POSTCIRCUMFIX goto reduce_end_1
-    dec circumnest
-  reduce_end_1:
     ret
 
   token_match:
@@ -537,15 +540,29 @@
     bsr reduce
     goto end
   end_1:
+    mpos = -1
+    ##   if the termstack is empty, fail the match
+    ##   if the term is an invalid term, fail the match
     $I0 = elements termstack
     if $I0 < 1 goto end_2
     $P0 = pop termstack
     unless $P0 goto end_2
     mob["expr"] = $P0
     mpos = wspos
-    .return (mob)
+    if wspos > 0 goto end_2
+    ##   somewhere we encountered an error that caused us to backtrack
+    ##   find the "real" ending position here
+  end_1a:
+    $I0 = $P0.to()
+    if $I0 <= wspos goto end_1b
+    wspos = $I0
+    mpos = $I0
+  end_1b:
+    $P0 = $P0[0]
+    if null $P0 goto end_2
+    $I0 = isa $P0, 'PGE::Match'
+    if $I0 goto end_1a
   end_2:
-    mpos = -1
     .return (mob)
 
   err_ternary:

Modified: branches/pge-pm/t/compilers/pge/03-optable.t
==============================================================================
--- branches/pge-pm/t/compilers/pge/03-optable.t        (original)
+++ branches/pge-pm/t/compilers/pge/03-optable.t        Fri Apr 21 10:22:18 2006
@@ -53,10 +53,8 @@
     'list associativity');
 
 optable_output_is('a b', 'term:a (pos=1)', 'two terms in sequence');
-optable_output_is('a = = b', 'term:a (pos=1)', 'two opers in sequence',
-    todo => 'fix end position');
-optable_output_is('a +', 'term:a (pos=1)', 'infix missing rhs',
-    todo => 'fix end position');
+optable_output_is('a = = b', 'term:a (pos=1)', 'two opers in sequence');
+optable_output_is('a +', 'term:a (pos=1)', 'infix missing rhs');
 
 optable_output_is('a++', 'postfix:++(term:a)', 'postfix');
 optable_output_is('a--', 'postfix:--(term:a)', 'postfix');
@@ -70,10 +68,8 @@
   'infix:+(infix:*(term:a, term:b), term:c) (pos=5)',
   'extra close paren');
 optable_output_is('  )a*b+c)+4', 'failed', 'only close paren');
-optable_output_is('(a*b+c', 'failed', 'missing close paren',
-  todo => 'fix close tokens');
-optable_output_is('(a*b+c]', 'failed', 'mismatch close paren',
-  todo => 'fix close tokens');
+optable_output_is('(a*b+c', 'failed', 'missing close paren');
+optable_output_is('(a*b+c]', 'failed', 'mismatch close paren');
 
 
 optable_output_is('a+++--b',

Reply via email to