On Thu, Feb 28, 2019 at 10:19:02PM +0100, Akim Demaille wrote:
> Hi HS!
>
> > Le 28 févr. 2019 à 07:32, H. S. Teoh <[email protected]> a écrit :
[...]
> > ungetc is a truly nasty hack of an API in C; is it really necessary
> > to support that?
>
> Certainly not! I'm merely trying to get a scanner to feed our
> parsers with tokens, and mimicking the C API is all my knowledge
> of D allows me to do...
OK, good. :-)
[...]
> > What I'd do is to templatize CalcLexer on an arbitrary input range
> > of chars, and leave the specifics of binding to a File (or whatever
> > else, like a string in a unittest) to the caller. And I wouldn't
> > bother with using class inheritance at all, since I can't envision
> > we'd ever need to swap in multiple lexers to the same parser. So
> > something like this:
>
> This is already quite advanced compared to the current state of the
> D skeleton. What I'm looking for is a simple scanner that works with
> what we have. For instance below I tried to import your suggestion
> into examples/d/calc.y, but it does not work. Could you help me
> make it work? Just put it in examples/d/ and run "make check".
[...]
Try the attached version of calc.y instead.
Eventually I think we should move away from the Lexer class, and have
the generated parser accept any type that has the required interface
(yylex(), semanticVal(), etc.). In the ideal case, the lexer would
simply be a struct that wraps around an arbitrary input range of
characters. In order to make things work with the current code, though,
I conceded to make CalcLexer a class that implements Lexer. We can
change that when we get around to moving away from lexer classes.
T
--
Many open minds should be closed for repairs. -- K5 user
%language "D"
%define api.parser.class {Calc}
%define parse.error verbose
%code imports {
import std.ascii;
import std.stdio;
}
%union {
int ival;
}
/* Bison Declarations */
%token EQ "="
PLUS "+"
MINUS "-"
STAR "*"
SLASH "/"
LPAR "("
RPAR ")"
EOL "end of line"
%token <ival> NUM "number"
%type <ival> exp
%left "-" "+"
%left "*" "/"
%precedence UNARY /* unary operators */
/* Grammar follows */
%%
input:
line
| input line
;
line:
EOL
| exp EOL { writeln ($exp); }
| error EOL
;
exp:
NUM { $$ = $1; }
| exp "+" exp { $$ = $1 + $3; }
| exp "-" exp { $$ = $1 - $3; }
| exp "*" exp { $$ = $1 * $3; }
| exp "/" exp { $$ = $1 / $3; }
| "+" exp %prec UNARY { $$ = -$2; }
| "-" exp %prec UNARY { $$ = -$2; }
| "(" exp ")" { $$ = $2; }
;
%%
import std.range.primitives;
auto calcLexer(R)(R range)
if (isInputRange!R && is(ElementType!R : dchar))
{
return new CalcLexer!R(range);
}
class CalcLexer(R) : Lexer
if (isInputRange!R && is(ElementType!R : dchar))
{
R input;
this(R r) { input = r; }
// Should be a local in main, shared with %parse-param.
int exit_status = 0;
public void yyerror (string s)
{
exit_status = 1;
stderr.writeln (s);
}
YYSemanticType semanticVal_;
public final @property YYSemanticType semanticVal()
{
return semanticVal_;
}
YYTokenType yylex()
{
import std.uni : isWhite, isNumber;
// Skip initial spaces
while (!input.empty && isWhite (input.front))
input.popFront;
// Handle EOF.
if (input.empty)
return YYTokenType.EOF;
// Numbers.
if (input.front == '.' || input.front.isNumber)
{
import std.conv : parse;
semanticVal_.ival = input.parse!int;
return YYTokenType.NUM;
}
// Individual characters
auto ch = input.front;
input.popFront;
switch (ch)
{
case EOF: return YYTokenType.EOF;
case '=': return YYTokenType.EQ;
case '+': return YYTokenType.PLUS;
case '-': return YYTokenType.MINUS;
case '*': return YYTokenType.STAR;
case '/': return YYTokenType.SLASH;
case '(': return YYTokenType.LPAR;
case ')': return YYTokenType.RPAR;
case '\n': return YYTokenType.EOL;
default: assert(0);
}
}
}
int main()
{
import std.algorithm : map, joiner;
import std.stdio;
import std.utf : byDchar;
auto l = stdin
.byChunk(1024) // avoid making a syscall roundtrip per char
.map!(chunk => cast(char[]) chunk) // because byChunk returns ubyte[]
.joiner // combine chunks into a single virtual range of char
.calcLexer;
Calc p = new Calc (l);
p.parse ();
return l.exit_status;
}