Hi Adela, This looks great! I'll install it as is. The suggested changes below are for forthcoming commits.
> Le 20 nov. 2020 à 15:40, Adela Vais <[email protected]> a écrit : > > +# b4_symbol_type_define > +# --------------------- > +# Define symbol_type, the external type for symbols used for symbol > +# constructors. > +m4_define([b4_symbol_type_define], > +[[ > + /** > + * A complete symbol > + */ > + struct Symbol > + { > + private SymbolKind kind; > + private ]b4_yystype[ value;]b4_locations_if([[ > + private YYLocation location_;]])[ You should introduce type aliases for b4_yystype and YYLocation. In the C++ parser, you have value_type and location_type which are defined to whatever they are actually. The code is nicer to read, with fewer occurrences of ugly YYnames. > + SymbolKind token() { return kind; } > + ]b4_yystype[ semanticValue() { return value; }]b4_locations_if([[ Make this value instead of semanticValue. > @@ -75,7 +75,7 @@ public interface Lexer > * to the next token and prepares to return the semantic value > * ]b4_locations_if([and beginning/ending positions ])[of the token. > * @@return the token identifier corresponding to the next token. */ > - TokenKind yylex (); > + ]b4_parser_class[.Symbol yylex (); Can't you provide an alias to avoid the need for the full path? > @@ -411,7 +411,9 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[) > yycdebugln (message); > } > } > -]])[ > +]]) > +b4_symbol_type_define > +[ I would prefer > ]])[ > ]b4_symbol_type_define[ > for consistency. > if (yychar == TokenKind.]b4_symbol(empty, id)[) > {]b4_parse_trace_if([[ > yycdebugln ("Reading a token");]])[ > - yychar = yylex ();]b4_locations_if([[ > - static if (yy_location_is_class) { > - yylloc = new ]b4_location_type[(yylexer.startPos, > yylexer.endPos); > - } else { > - yylloc = ]b4_location_type[(yylexer.startPos, yylexer.endPos); > - }]]) > - yylval = yylexer.semanticVal;[ > + Symbol yysymbol = yylex(); > + yychar = yysymbol.token(); Maybe you don't need yychar, but only need yytoken. You probably can avoid dealing with the token-kinds here, and deal only with the symbol kinds. > + yylval = yysymbol.semanticValue();]b4_locations_if([[ > + yylloc = yysymbol.location();]])[ > } > > /* Convert token to internal form. */ > +@deftypemethod {Lexer} {YYParser.Symbol} yylex() > +Return the next token. The return value is of type YYParser.Symbol, Use @code{YYParser.Symbol}. > which > +binds together the TokenKind, the semantic value and the location. @code for TokenKind too. > @end deftypemethod > > @deftypemethod {Lexer} {YYPosition} getStartPos() > diff --git a/examples/d/calc/calc.y b/examples/d/calc/calc.y > index bee3fc94..3f41f048 100644 > --- a/examples/d/calc/calc.y > +++ b/examples/d/calc/calc.y > @@ -114,7 +114,7 @@ if (isInputRange!R && is(ElementType!R : dchar)) > return semanticVal_; > } > > - TokenKind yylex() > + Calc.Symbol yylex() > { > import std.uni : isWhite, isNumber; > > @@ -127,7 +127,7 @@ if (isInputRange!R && is(ElementType!R : dchar)) > } > > if (input.empty) > - return TokenKind.YYEOF; > + return Calc.Symbol(TokenKind.YYEOF, new YYLocation(startPos, endPos)); > > // Numbers. > if (input.front.isNumber) > @@ -143,7 +143,7 @@ if (isInputRange!R && is(ElementType!R : dchar)) > } > start = end; > end.column += lenChars; > - return TokenKind.NUM; > + return Calc.Symbol(TokenKind.NUM, semanticVal_.ival, new > YYLocation(startPos, endPos)); > } > > // Individual characters > @@ -153,17 +153,17 @@ if (isInputRange!R && is(ElementType!R : dchar)) > end.column++; > switch (ch) > { > - case '+': return TokenKind.PLUS; > - case '-': return TokenKind.MINUS; > - case '*': return TokenKind.STAR; > - case '/': return TokenKind.SLASH; > - case '(': return TokenKind.LPAR; > - case ')': return TokenKind.RPAR; > + case '+': return Calc.Symbol(TokenKind.PLUS, new YYLocation(startPos, > endPos)); > + case '-': return Calc.Symbol(TokenKind.MINUS, new > YYLocation(startPos, endPos)); > + case '*': return Calc.Symbol(TokenKind.STAR, new YYLocation(startPos, > endPos)); > + case '/': return Calc.Symbol(TokenKind.SLASH, new > YYLocation(startPos, endPos)); > + case '(': return Calc.Symbol(TokenKind.LPAR, new YYLocation(startPos, > endPos)); > + case ')': return Calc.Symbol(TokenKind.RPAR, new YYLocation(startPos, > endPos)); This is super verbose. Can't you factor some 'loc' variable and use it? > @@ -133,13 +132,13 @@ if (isInputRange!R && is(ElementType!R : dchar)) > input.popFront; > switch (ch) > { > - case '+': return TokenKind.PLUS; > - case '-': return TokenKind.MINUS; > - case '*': return TokenKind.STAR; > - case '/': return TokenKind.SLASH; > - case '(': return TokenKind.LPAR; > - case ')': return TokenKind.RPAR; > - case '\n': return TokenKind.EOL; > + case '+': return Calc.Symbol(TokenKind.PLUS); > + case '-': return Calc.Symbol(TokenKind.MINUS); > + case '*': return Calc.Symbol(TokenKind.STAR); > + case '/': return Calc.Symbol(TokenKind.SLASH); > + case '(': return Calc.Symbol(TokenKind.LPAR); > + case ')': return Calc.Symbol(TokenKind.RPAR); > + case '\n': return Calc.Symbol(TokenKind.EOL); In modern C++ there's a feature I like: the constructor can be called implicitly. So for instance MyStruct foo() { return 42; } actually means MyStruct foo() { return MyStruct(42); } If D has something equivalent, you can probably simplify all this. > switch (c) > { > - case '+': return TokenKind.]AT_TOKEN_PREFIX[PLUS; > - case '-': return TokenKind.]AT_TOKEN_PREFIX[MINUS; > - case '*': return TokenKind.]AT_TOKEN_PREFIX[STAR; > - case '/': return TokenKind.]AT_TOKEN_PREFIX[SLASH; > - case '(': return TokenKind.]AT_TOKEN_PREFIX[LPAR; > - case ')': return TokenKind.]AT_TOKEN_PREFIX[RPAR; > - case '\n': return TokenKind.]AT_TOKEN_PREFIX[EOL; > - case '=': return TokenKind.]AT_TOKEN_PREFIX[EQUAL; > - case '^': return TokenKind.]AT_TOKEN_PREFIX[POW; > - case '!': return TokenKind.]AT_TOKEN_PREFIX[NOT; > - default: return TokenKind.]AT_TOKEN_PREFIX[YYUNDEF; > + case '+': return > YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[PLUS]AT_LOCATION_IF([[, new > YYLocation(startPos, endPos)]])[); > + case '-': return > YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[MINUS]AT_LOCATION_IF([[, new > YYLocation(startPos, endPos)]])[); > + case '*': return > YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[STAR]AT_LOCATION_IF([[, new > YYLocation(startPos, endPos)]])[); > + case '/': return > YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[SLASH]AT_LOCATION_IF([[, new > YYLocation(startPos, endPos)]]) Same comments about verbosity and redundancy. Great job Adela!
