Attached is a modified version of p9p yacc that
supports the Go grammar. I'll be sending a
version of Plan 9 yacc later today.
The following is a description of the changes.
1. The %error-verbose directive is ignored.
2. A description of the final grammar is
printed before the state descriptions
in y.output.
3. The 'x' format for character literals is
now used instead of prefixing with a space.
4. The YYEMPTY define is now used to clear
the lookahead token (instead of an explicit
negative one).
5. Make yychar and yystate globals so they
can be inspected by external code.
5. Support C++ style // comments in actions.
6. Add a usage message.
7. Fix a few uses of sprint and strcpy.
I've also sent out a changeset to the Go
development list which adds support for
using Plan 9 yacc to generate the special
errors.
One tiny nit is that Plan 9 uses the name
yytoknames for debugging where Bison uses
yytname. I've just used sed for this.
Any questions?
Anthony
diff -r 44a7194d00cf lib/yaccpar
--- a/lib/yaccpar Sat Nov 12 11:52:10 2011 -0800
+++ b/lib/yaccpar Tue Nov 15 13:10:13 2011 -0800
@@ -2,7 +2,7 @@
#define YYERROR goto yyerrlab
#define YYACCEPT return(0)
#define YYABORT return(1)
-#define yyclearin yychar = -1
+#define yyclearin yychar = YYEMPTY
#define yyerrok yyerrflag = 0
#ifdef yydebug
@@ -51,6 +51,8 @@
return x;
}
+long yychar;
+
static long
#ifdef YYARG
yylex1(struct Yyarg *yyarg)
@@ -58,7 +60,6 @@
yylex1(void)
#endif
{
- long yychar;
const long *t3p;
int c;
@@ -68,6 +69,7 @@
yychar = yylex();
#endif
if(yychar <= 0) {
+ yychar = 0;
c = yytok1[0];
goto out;
}
@@ -99,6 +101,8 @@
return c;
}
+int yystate;
+
int
#ifdef YYARG
yyparse(struct Yyarg *yyarg)
@@ -112,8 +116,8 @@
int yys;
} yys[YYMAXDEPTH], *yyp, *yypt;
const short *yyxi;
- int yyj, yym, yystate, yyn, yyg;
- long yychar;
+ int yyj, yym, yyn, yyg;
+ long yyc;
#ifndef YYARG
YYSTYPE save1, save2;
int save3, save4;
@@ -125,7 +129,8 @@
#endif
yystate = 0;
- yychar = -1;
+ yychar = YYEMPTY;
+ yyc = YYEMPTY;
yynerrs = 0;
yyerrflag = 0;
yyp = &yys[-1];
@@ -151,7 +156,7 @@
yystack:
/* put a state and value onto the stack */
if(yydebug >= 4)
- fprint(2, "char %s in %s", yytokname(yychar),
yystatname(yystate));
+ fprint(2, "char %s in %s", yytokname(yyc), yystatname(yystate));
yyp++;
if(yyp >= &yys[YYMAXDEPTH]) {
@@ -165,18 +170,19 @@
yyn = yypact[yystate];
if(yyn <= YYFLAG)
goto yydefault; /* simple state */
- if(yychar < 0)
+ if(yyc < 0)
#ifdef YYARG
- yychar = yylex1(yyarg);
+ yyc = yylex1(yyarg);
#else
- yychar = yylex1();
+ yyc = yylex1();
#endif
- yyn += yychar;
+ yyn += yyc;
if(yyn < 0 || yyn >= YYLAST)
goto yydefault;
yyn = yyact[yyn];
- if(yychk[yyn] == yychar) { /* valid shift */
- yychar = -1;
+ if(yychk[yyn] == yyc) { /* valid shift */
+ yyc = YYEMPTY;
+ yychar = YYEMPTY;
yyval = yylval;
yystate = yyn;
if(yyerrflag > 0)
@@ -188,11 +194,11 @@
/* default state action */
yyn = yydef[yystate];
if(yyn == -2) {
- if(yychar < 0)
+ if(yyc < 0)
#ifdef YYARG
- yychar = yylex1(yyarg);
+ yyc = yylex1(yyarg);
#else
- yychar = yylex1();
+ yyc = yylex1();
#endif
/* look through exception table */
@@ -201,21 +207,24 @@
break;
for(yyxi += 2;; yyxi += 2) {
yyn = yyxi[0];
- if(yyn < 0 || yyn == yychar)
+ if(yyn < 0 || yyn == yyc)
break;
}
yyn = yyxi[1];
- if(yyn < 0)
+ if(yyn < 0) {
+ yyc = YYEMPTY;
+ yychar = YYEMPTY;
goto ret0;
+ }
}
if(yyn == 0) {
/* error ... attempt to resume parsing */
switch(yyerrflag) {
case 0: /* brand new error */
yyerror("syntax error");
- if(yydebug >= 1) {
+ if(yydebug >= 2) {
fprint(2, "%s", yystatname(yystate));
- fprint(2, "saw %s\n", yytokname(yychar));
+ fprint(2, "saw %s\n", yytokname(yyc));
}
goto yyerrlab;
yyerrlab:
@@ -245,10 +254,11 @@
case 3: /* no shift yet; clobber input char */
if(yydebug >= 2)
- fprint(2, "error recovery discards %s\n",
yytokname(yychar));
- if(yychar == YYEOFCODE)
+ fprint(2, "error recovery discards %s\n",
yytokname(yyc));
+ if(yyc == YYEOFCODE)
goto ret1;
- yychar = -1;
+ yyc = YYEMPTY;
+ yychar = YYEMPTY;
goto yynewstate; /* try again in the same state */
}
}
diff -r 44a7194d00cf src/cmd/yacc.c
--- a/src/cmd/yacc.c Sat Nov 12 11:52:10 2011 -0800
+++ b/src/cmd/yacc.c Tue Nov 15 13:10:13 2011 -0800
@@ -92,6 +92,7 @@
TYPEDEF,
TYPENAME,
UNION,
+ IGNORE,
ENDFILE = 0,
@@ -319,6 +320,9 @@
"token", TERM,
"type", TYPEDEF,
"union", UNION,
+
+ /* ignored bison directives */
+ "error-verbose", IGNORE,
0,
};
@@ -330,6 +334,7 @@
char* writem(int*);
char* symnam(int);
void summary(void);
+void grammar(void);
void error(char*, ...);
void aryfil(int*, int, int);
int setunion(int*, int*);
@@ -388,6 +393,7 @@
cempty(); /* make a table of which nonterminals can match
the empty string */
cpfir(); /* make a table of firsts of nonterminals */
stagen(); /* generate the states */
+ grammar();
output(); /* write the states and the tables */
go2out();
hideprod();
@@ -531,14 +537,14 @@
;
p = prdptr[-*p];
q = chcopy(sarr, nontrst[*p-NTBASE].name);
- q = chcopy(q, ": ");
+ q = chcopy(q, ":");
for(;;) {
*q = ' ';
p++;
- if(p == pp)
- *q = '.';
q++;
*q = '\0';
+ if(p == pp)
+ q = chcopy(q, ". ");
i = *p;
if(i <= 0)
break;
@@ -550,7 +556,7 @@
/* an item calling for a reduction */
i = *pp;
if(i < 0 ) {
- q = chcopy(q, " (");
+ q = chcopy(q, " (");
sprint(q, "%d)", -i);
}
return sarr;
@@ -562,12 +568,41 @@
char*
symnam(int i)
{
- char* cp;
+ return (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
+}
- cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
- if(*cp == ' ')
- cp++;
- return cp;
+/*
+ * output the grammar rules on y.output
+ */
+void
+grammar(void)
+{
+ int i, j, n, prev;
+ int *p;
+
+ if(foutput == 0)
+ return;
+
+ Bprint(foutput, "\nGrammar\n");
+ prev = 0;
+ n = 0;
+ PLOOP(0, i) {
+ p = prdptr[i];
+ j = 0;
+ if(p[0] == prev)
+ Bprint(foutput, "\t%d \t|", n++);
+ else {
+ Bprint(foutput, "\n\t%d %s:", n++, symnam(p[0]));
+ if(p[1] <= 0)
+ Bprint(foutput, " /* empty */");
+ }
+ for(j = 1; p[j] > 0; j++) {
+ Bprint(foutput, " %s", symnam(p[j]));
+ }
+ Bputc(foutput, '\n');
+ prev = p[0];
+ }
+ Bprint(foutput, "\n");
}
/*
@@ -1173,10 +1208,17 @@
}
void
+usage(void)
+{
+ fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem]
grammar\n");
+ exits("usage");
+}
+
+void
setup(int argc, char *argv[])
{
long c, t;
- int i, j, fd, lev, ty, ytab, *p;
+ int i, j, lev, ty, ytab, *p;
int vflag, dflag, stem;
char actnm[8], *stemc, *s, dirbuf[128];
Biobuf *fout;
@@ -1195,7 +1237,7 @@
vflag++;
break;
case 'D':
- yydebug = ARGF();
+ yydebug = EARGF(usage());
break;
case 'a':
yyarg = 1;
@@ -1208,7 +1250,7 @@
break;
case 'o':
ytab++;
- ytabc = ARGF();
+ ytabc = EARGF(usage());
break;
case 's':
stem++;
@@ -1221,18 +1263,11 @@
error("illegal option: %c", ARGC());
}ARGEND
openup(stemc, dflag, vflag, ytab, ytabc);
- fout = dflag?fdefine:ftable;
- if(yyarg){
+ if(yyarg)
Bprint(ftable, "#define\tYYARG\t1\n\n");
- }
- if((fd = mkstemp(ttempname)) >= 0){
- tempname = ttempname;
- ftemp = Bfdopen(fd, OWRITE);
- }
- if((fd = mkstemp(tactname)) >= 0){
- actname = tactname;
- faction = Bfdopen(fd, OWRITE);
- }
+
+ ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
+ faction = Bopen(actname = mktemp(tactname), OWRITE);
if(ftemp == 0 || faction == 0)
error("cannot open temp file");
if(argc < 1)
@@ -1375,6 +1410,10 @@
t = gettok();
continue;
+ case IGNORE:
+ t = gettok();
+ continue;
+
default:
error("syntax error");
}
@@ -1396,8 +1435,11 @@
Bprint(ftable, "YYSTYPE yylval;\n");
Bprint(ftable, "YYSTYPE yyval;\n");
}else{
- if(dflag)
+ fout = ftable;
+ if(dflag){
+ fout = fdefine;
Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
+ }
Bprint(fout, "struct Yyarg {\n");
Bprint(fout, "\tint\tyynerrs;\n");
Bprint(fout, "\tint\tyyerrflag;\n");
@@ -1406,6 +1448,7 @@
Bprint(fout, "\tYYSTYPE\tyylval;\n");
Bprint(fout, "};\n\n");
}
+
prdptr[0] = mem;
/* added production */
@@ -1552,6 +1595,7 @@
Bterm(faction);
Bprint(ftable, "#define YYEOFCODE %d\n", 1);
Bprint(ftable, "#define YYERRCODE %d\n", 2);
+ Bprint(ftable, "#define YYEMPTY (%d)\n", -2);
}
/*
@@ -1581,17 +1625,17 @@
/* establish value for token */
/* single character literal */
- if(s[0] == ' ') {
+ if(s[0] == '\'') {
val = chartorune(&rune, &s[1]);
- if(s[val+1] == 0) {
+ if(s[val+1] == '\'') {
val = rune;
goto out;
}
}
/* escape sequence */
- if(s[0] == ' ' && s[1] == '\\') {
- if(s[3] == 0) {
+ if(s[0] == '\'' && s[1] == '\\') {
+ if(s[3] == '\'') {
/* single character escape sequence */
switch(s[2]) {
case 'n': val = '\n'; break;
@@ -1625,6 +1669,7 @@
val = extval++;
out:
+ //print("%s = %d\n", s, val);
tokset[ntokens].value = val;
toklev[ntokens] = 0;
return ntokens;
@@ -1642,7 +1687,7 @@
for(i=ndefout; i<=ntokens; i++) {
/* non-literals */
c = tokset[i].name[0];
- if(c != ' ' && c != '$') {
+ if(c != '\'' && c != '$') {
Bprint(ftable, "#define %s %d\n",
tokset[i].name, tokset[i].value);
if(fdefine)
@@ -1737,7 +1782,7 @@
case '"':
case '\'':
match = c;
- tokname[0] = ' ';
+ tokname[0] = '\'';
i = 1;
for(;;) {
c = Bgetrune(finput);
@@ -1756,6 +1801,8 @@
if(i < NAMESIZE)
i += c;
}
+ tokname[i] = '\'';
+ i++;
break;
case '%':
@@ -1847,7 +1894,7 @@
{
int i;
- if(s[0] == ' ')
+ if(s[0] == '\'')
t = 0;
TLOOP(i)
if(!strcmp(s, tokset[i].name))
@@ -1915,6 +1962,7 @@
void
cpycode(void)
{
+
long c;
c = Bgetrune(finput);
@@ -1956,17 +2004,22 @@
/* i is the number of lines skipped */
i = 0;
- if(Bgetrune(finput) != '*')
+ c = Bgetrune(finput);
+ if(c == '/'){ /* C++ //: skip to end of line */
+ while((c = Bgetrune(finput)) != Beof)
+ if(c == '\n')
+ return 1;
+ }else if(c == '*'){ /* normal C comment */
+ while((c = Bgetrune(finput)) != Beof) {
+ while(c == '*')
+ if((c = Bgetrune(finput)) == '/')
+ return i;
+ if(c == '\n')
+ i++;
+ }
+ }else
error("illegal comment");
- c = Bgetrune(finput);
- while(c != Beof) {
- while(c == '*')
- if((c=Bgetrune(finput)) == '/')
- return i;
- if(c == '\n')
- i++;
- c = Bgetrune(finput);
- }
+
error("EOF inside comment");
return 0;
}
@@ -2094,22 +2147,30 @@
/* look for comments */
Bputrune(faction, c);
c = Bgetrune(finput);
- if(c != '*')
+ switch(c) {
+ case '/':
+ while(c != Beof) {
+ if(c == '\n')
+ goto swt;
+ Bputrune(faction, c);
+ c = Bgetrune(finput);
+ }
+ break;
+ case '*':
+ while(c != Beof) {
+ while(c == '*') {
+ Bputrune(faction, c);
+ if((c = Bgetrune(finput)) == '/')
+ goto lcopy;
+ }
+ Bputrune(faction, c);
+ if(c == '\n')
+ lineno++;
+ c = Bgetrune(faction);
+ }
+ break;
+ default:
goto swt;
-
- /* it really is a comment */
- Bputrune(faction, c);
- c = Bgetrune(finput);
- while(c >= 0) {
- while(c == '*') {
- Bputrune(faction, c);
- if((c=Bgetrune(finput)) == '/')
- goto lcopy;
- }
- Bputrune(faction, c);
- if(c == '\n')
- lineno++;
- c = Bgetrune(finput);
}
error("EOF inside comment");
@@ -2158,26 +2219,26 @@
char buf[256];
if(vflag) {
- sprint(buf, "%s.%s", stem, FILEU);
+ snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
foutput = Bopen(buf, OWRITE);
if(foutput == 0)
error("cannot open %s", buf);
}
if(yydebug) {
- sprint(buf, "%s.%s", stem, FILEDEBUG);
+ snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
if((fdebug = Bopen(buf, OWRITE)) == 0)
error("can't open %s", buf);
}
if(dflag) {
- sprint(buf, "%s.%s", stem, FILED);
+ snprint(buf, sizeof buf, "%s.%s", stem, FILED);
fdefine = Bopen(buf, OWRITE);
if(fdefine == 0)
error("can't create %s", buf);
}
if(ytab == 0)
- sprint(buf, "%s.%s", stem, OFILE);
+ snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
else
- strcpy(buf, ytabc);
+ strecpy(buf, buf+sizeof buf, ytabc);
ftable = Bopen(buf, OWRITE);
if(ftable == 0)
error("cannot open table file %s", buf);