Hi all,
Please find attached a patch for ticket 2550: "Lemon parser may accept
input before EOF is seen."
I'm sending it to the list as I do not seem able to pass the abbreviated
Turing test that guards attaching stuff to the ticket. :(
It appears to work over here where several lemon generated parsers are
used in a context with heavy regression tests.
Patch hereby released to the public.
Enjoy. Vincent.
Index: lemon.c
===================================================================
--- lemon.c (revision 3121)
+++ lemon.c (working copy)
@@ -42,6 +42,7 @@
/********** From the file "build.h" ************************************/
void FindRulePrecedences();
void FindFirstSets();
+void FindStartSymbol();
void FindStates();
void FindLinks();
void FindFollowSets();
@@ -244,6 +245,7 @@
char *tokentype; /* Type of terminal symbols in the parser stack */
char *vartype; /* The default type of non-terminal symbols */
char *start; /* Name of the start symbol for the grammar */
+ struct symbol *startsym; /* Start symbol for the grammar */
char *stacksize; /* Size of the parser stack */
char *include; /* Code to put at the start of the C file */
int includeln; /* Line number for start of include code */
@@ -660,19 +662,12 @@
return;
}
-/* Compute all LR(0) states for the grammar. Links
-** are added to between some states so that the LR(1) follow sets
-** can be computed later.
-*/
-PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */
-void FindStates(lemp)
+void FindStartSymbol(lemp)
struct lemon *lemp;
{
struct symbol *sp;
struct rule *rp;
- Configlist_init();
-
/* Find the start symbol */
if( lemp->start ){
sp = Symbol_find(lemp->start);
@@ -704,10 +699,23 @@
}
}
+ lemp->startsym = sp;
+}
+
+/* Compute all LR(0) states for the grammar. Links
+** are added to between some states so that the LR(1) follow sets
+** can be computed later.
+*/
+PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */
+void FindStates(lemp)
+struct lemon *lemp;
+{
+ struct rule *rp;
+
/* The basis configuration set for the first state
** is all rules which have the start symbol as their
** left-hand side */
- for(rp=sp->rule; rp; rp=rp->nextlhs){
+ for(rp=lemp->startsym->rule; rp; rp=rp->nextlhs){
struct config *newcfp;
newcfp = Configlist_addbasis(rp,0);
SetAdd(newcfp->fws,0);
@@ -921,7 +929,6 @@
int i,j;
struct config *cfp;
struct state *stp;
- struct symbol *sp;
struct rule *rp;
/* Add all of the reduce actions
@@ -944,16 +951,10 @@
}
/* Add the accepting token */
- if( lemp->start ){
- sp = Symbol_find(lemp->start);
- if( sp==0 ) sp = lemp->rule->lhs;
- }else{
- sp = lemp->rule->lhs;
- }
/* Add to the first state (which is always the starting state of the
** finite state machine) an action to ACCEPT if the lookahead is the
** start nonterminal. */
- Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
+ Action_add(&lemp->sorted[0]->ap,ACCEPT,lemp->startsym,0);
/* Resolve conflicts */
for(i=0; i<lemp->nstate; i++){
@@ -1461,6 +1462,11 @@
** nonterminal */
FindFirstSets(&lem);
+ Configlist_init();
+
+ /* Determine the start symbol */
+ FindStartSymbol(&lem);
+
/* Compute all LR(0) states. Also record follow-set propagation
** links so that the follow-set can be computed later */
lem.nstate = 0;
@@ -3941,6 +3947,7 @@
}
if( ap->type!=REDUCE ) continue;
rp = ap->x.rp;
+ if (rp->lhs == lemp->startsym) continue; /* do not default a start rule to ensure reduction only on EOF */
if( rp==rbest ) continue;
n = 1;
for(ap2=ap->next; ap2; ap2=ap2->next){
-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------