Author: pmichaud
Date: Fri Apr 21 10:22:18 2006
New Revision: 12389
Modified:
branches/pge-pm/compilers/pge/PGE/OPTable.pir
branches/pge-pm/t/compilers/pge/03-optable.t
Log:
[PGE]:
* Updated OPTable for better handling of close tokens (and missing closes).
* Improved comments a bit (more to come).
Modified: branches/pge-pm/compilers/pge/PGE/OPTable.pir
==============================================================================
--- branches/pge-pm/compilers/pge/PGE/OPTable.pir (original)
+++ branches/pge-pm/compilers/pge/PGE/OPTable.pir Fri Apr 21 10:22:18 2006
@@ -152,7 +152,7 @@
if $I0 goto end
tokentable[name] = token
- ## don't process undef syntactic categories -- just store
+ ## don't process undef syntactic categories -- just store
if mode == 0 goto end
$S0 = args['match']
@@ -266,8 +266,6 @@
.sub "parse" :method
.param pmc mob
- .param pmc stoptoken :optional
- .param int has_stoptoken :opt_flag
.local pmc tokentable, keytable, klentable
.local pmc tokenstack, operstack, termstack
.local pmc newfrom
@@ -281,9 +279,7 @@
.local pmc iter
.local int tokenmode, topmode
.local int tokencat, topcat
- .local int arity
.local int lastcat
- .local int circumnest
tokentable = self
keytable = getattribute self, "PGE::OPTable\x0%!key"
@@ -298,12 +294,6 @@
pos = mfrom
lastpos = length target
lastcat = PGE_OPTABLE_EMPTY
- circumnest = 0
-
- ## if an empty stoptoken was sent, pretend we didn't get one
- if has_stoptoken == 0 goto expect_term
- if stoptoken > '' goto expect_term
- has_stoptoken = 0
expect_term:
expect = PGE_OPTABLE_EXPECT_TERM
@@ -317,10 +307,12 @@
expect = PGE_OPTABLE_EXPECT_OPER
token_next:
- ## Figure out what we're looking for
+ ## figure out what we're looking for
+ ## if we're at the end of the string, end match
wspos = pos
if pos >= lastpos goto oper_not_found
- if_null ws, token_next_ws
+ ## check for leading whitespace -- it may limit token candidates
+ if null ws goto token_next_ws
mpos = pos
$P0 = ws(mob)
unless $P0 goto token_next_1
@@ -330,10 +322,13 @@
pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
token_next_1:
nows = 0
+ ## "nows" tokens are eligible if we don't have leading ws
if pos == wspos goto key_search
nows = PGE_OPTABLE_NOWS
+ ## look through eligible tokens to find longest match
key_search:
+ ## use the next character of input stream to limit search
key = substr target, pos, 1
$I0 = klentable[key]
key = substr target, pos, $I0
@@ -363,34 +358,33 @@
goto key_loop
token_nows:
if pos == wspos goto oper_not_found
+ ## try again, with the whitespace operators this time
pos = wspos
nows = 0
goto key_search
oper_not_found:
+ ## we were unable to find a valid token for the current expect state
+ ## if we're not expecting a term, then end the match here
$I0 = expect & PGE_OPTABLE_EXPECT_TERM
if $I0 == 0 goto end
+ ## otherwise, let's add a "dummy" term to the stack for reduction
(oper, $S0, $P0, $P1) = newfrom(mob, pos, "PGE::Match")
push termstack, oper
+ ## if the current operator doesn't allow nullterm, end match
unless tokenstack goto end
top = tokenstack[-1]
topmode = top["mode"]
$I0 = topmode & PGE_OPTABLE_NULLTERM
if $I0 == 0 goto end
+ ## it's a nullterm operator, so we can continue parsing
$P1 = pos
goto expect_oper
oper_found:
- ## if we're at a stop token, end the parse here
- if circumnest > 0 goto oper_valid
- if has_stoptoken == 0 goto oper_valid
- $P0 = token['name']
- if $P0 == stoptoken goto oper_not_found
-
- oper_valid:
tokenmode = token["mode"]
tokencat = tokenmode & PGE_OPTABLE_SYNCAT
- ## this hack handles prelist term followed by postcircumfix op
+ ## this hack handles prelist term followed by postcircumfix op
if lastcat != PGE_OPTABLE_PRELIST goto oper_found_1
if tokencat != PGE_OPTABLE_POSTCIRCUMFIX goto oper_found_1
$P0 = pop tokenstack
@@ -398,32 +392,30 @@
push termstack, $P0
oper_found_1:
lastcat = tokencat
+ ## the remainder of this section processes according to the
+ ## table at the end of this function
if tokencat == PGE_OPTABLE_TERM goto term_shift
if tokencat == PGE_OPTABLE_PREFIX goto oper_shift # (S1)
if tokencat == PGE_OPTABLE_CIRCUMFIX goto oper_shift # (S2)
- ## Check that we already have a term
$I0 = elements termstack
if $I0 > 0 goto shift_reduce
if tokencat != PGE_OPTABLE_PRELIST goto end
## The shift/reduce loop
shift_reduce:
- ## If the token stack is empty, shift
$I0 = elements tokenstack
if $I0 > 0 goto shift_reduce_1
if tokencat == PGE_OPTABLE_CLOSE goto end # (E3)
topcat = PGE_OPTABLE_EMPTY
goto oper_shift # (S3)
shift_reduce_1:
- ## Compare with token at top of stack
top = tokenstack[-1]
topmode = top["mode"]
topcat = topmode & PGE_OPTABLE_SYNCAT
if topcat == PGE_OPTABLE_POSTFIX goto oper_reduce # (R4)
if tokencat == PGE_OPTABLE_CLOSE goto oper_close # (R5, C5)
if topcat >= PGE_OPTABLE_POSTCIRCUMFIX goto oper_shift # (S6)
- ## Check operator precedence
$P0 = token['precedence']
$P1 = top['prec_close']
if $P0 > $P1 goto oper_shift # (P)
@@ -440,17 +432,19 @@
goto shift_reduce
oper_close:
+ ## if the top operator isn't a circumfix, reduce it
+ ## if the close token doesn't match circumfix close, end here
+ ## else shift (fall-through)
if topcat < PGE_OPTABLE_TERNARY goto oper_reduce # (R5)
- $S0 = top["keyclose"]
- if key != $S0 goto end # (C5)
+ $S0 = top['keyclose']
+ if key != $S0 goto end
oper_shift:
+ ## shift operator onto the operator stack
push tokenstack, token
push operstack, oper
- if tokencat < PGE_OPTABLE_POSTCIRCUMFIX goto oper_shift_1
- inc circumnest
- oper_shift_1:
pos = oper.to()
+ ## choose next expect state based on current state
if tokencat == PGE_OPTABLE_PRELIST goto expect_termpost
if tokencat >= PGE_OPTABLE_PREFIX goto expect_term
if tokencat == PGE_OPTABLE_POSTFIX goto expect_oper
@@ -465,13 +459,24 @@
## reduce top operation on stack
reduce:
$P0 = pop tokenstack
+ $P1 = pop operstack
topmode = $P0["mode"]
topcat = topmode & PGE_OPTABLE_SYNCAT
- if topcat != PGE_OPTABLE_CLOSE goto reduce_1
+ if topcat == PGE_OPTABLE_CLOSE goto reduce_close
+ if topcat < PGE_OPTABLE_POSTCIRCUMFIX goto reduce_normal
+ ## we have an unbalanced open, so error. remove the
+ ## incomplete circumfixed term, and for circumfix: opers
+ ## put a failed nullterm onto the termstack
+ wspos = -1
+ $P0 = pop termstack
+ if topcat != PGE_OPTABLE_CIRCUMFIX goto reduce_end
+ (oper, $S0, $P0, $P1) = newfrom(mob, pos, "PGE::Match")
+ goto reduce_end
+ reduce_close:
$P0 = pop tokenstack
$P1 = pop operstack
- reduce_1:
- $P1 = pop operstack
+ reduce_normal:
+ .local int arity
topmode = $P0["mode"]
arity = topmode & PGE_OPTABLE_ARITY
reduce_args:
@@ -482,11 +487,12 @@
$P1[arity] = $P2
goto reduce_args
reduce_backtrack:
- wspos = $P1.from()
- if arity > 0 goto reduce_end
+ wspos = -1
+ if arity > 0 goto end
push termstack, $P2
goto reduce_end
reduce_list:
+ ## combine matching list associative operations
$I0 = topmode & PGE_OPTABLE_ASSOC
if $I0 != PGE_OPTABLE_ASSOC_LIST goto reduce_saveterm
$S1 = $P1['type']
@@ -499,9 +505,6 @@
reduce_saveterm:
push termstack, $P1
reduce_end:
- if topcat < PGE_OPTABLE_POSTCIRCUMFIX goto reduce_end_1
- dec circumnest
- reduce_end_1:
ret
token_match:
@@ -537,15 +540,29 @@
bsr reduce
goto end
end_1:
+ mpos = -1
+ ## if the termstack is empty, fail the match
+ ## if the term is an invalid term, fail the match
$I0 = elements termstack
if $I0 < 1 goto end_2
$P0 = pop termstack
unless $P0 goto end_2
mob["expr"] = $P0
mpos = wspos
- .return (mob)
+ if wspos > 0 goto end_2
+ ## somewhere we encountered an error that caused us to backtrack
+ ## find the "real" ending position here
+ end_1a:
+ $I0 = $P0.to()
+ if $I0 <= wspos goto end_1b
+ wspos = $I0
+ mpos = $I0
+ end_1b:
+ $P0 = $P0[0]
+ if null $P0 goto end_2
+ $I0 = isa $P0, 'PGE::Match'
+ if $I0 goto end_1a
end_2:
- mpos = -1
.return (mob)
err_ternary:
Modified: branches/pge-pm/t/compilers/pge/03-optable.t
==============================================================================
--- branches/pge-pm/t/compilers/pge/03-optable.t (original)
+++ branches/pge-pm/t/compilers/pge/03-optable.t Fri Apr 21 10:22:18 2006
@@ -53,10 +53,8 @@
'list associativity');
optable_output_is('a b', 'term:a (pos=1)', 'two terms in sequence');
-optable_output_is('a = = b', 'term:a (pos=1)', 'two opers in sequence',
- todo => 'fix end position');
-optable_output_is('a +', 'term:a (pos=1)', 'infix missing rhs',
- todo => 'fix end position');
+optable_output_is('a = = b', 'term:a (pos=1)', 'two opers in sequence');
+optable_output_is('a +', 'term:a (pos=1)', 'infix missing rhs');
optable_output_is('a++', 'postfix:++(term:a)', 'postfix');
optable_output_is('a--', 'postfix:--(term:a)', 'postfix');
@@ -70,10 +68,8 @@
'infix:+(infix:*(term:a, term:b), term:c) (pos=5)',
'extra close paren');
optable_output_is(' )a*b+c)+4', 'failed', 'only close paren');
-optable_output_is('(a*b+c', 'failed', 'missing close paren',
- todo => 'fix close tokens');
-optable_output_is('(a*b+c]', 'failed', 'mismatch close paren',
- todo => 'fix close tokens');
+optable_output_is('(a*b+c', 'failed', 'missing close paren');
+optable_output_is('(a*b+c]', 'failed', 'mismatch close paren');
optable_output_is('a+++--b',