[PATCH 03/10] d: put YYEMPTY in the TokenKind

Akim Demaille Mon, 13 Apr 2020 08:44:51 -0700

* data/skeletons/d.m4, data/skeletons/lalr1.d (b4_token_enums): Rename
YYTokenType as TokenKind.
Define YYEMPTY.
* examples/d/calc.y, tests/calc.at, tests/scanner.at: Adjust.
---
 TODO                        |  7 ++++++-
 data/skeletons/README-D.txt |  4 ++--
 data/skeletons/d.m4         |  8 +++-----
 data/skeletons/lalr1.d      | 15 +++++++--------
 examples/d/calc.y           | 24 +++++++++++-------------
 tests/calc.at               |  4 ++--
 tests/scanner.at            | 16 ++++++++--------
 7 files changed, 39 insertions(+), 39 deletions(-)


diff --git a/TODO b/TODO
index e32f6ec6..a1dec320 100644
--- a/TODO
+++ b/TODO
@@ -120,6 +120,11 @@ 
https://www.cs.tufts.edu/~nr/cs257/archive/clinton-jefferey/lr-error-messages.pd
 https://research.swtch.com/yyerror
 http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf
 
+* D
+** yylex
+It would be better to have TokenKind as return value.  Can we use reflexion
+to support both output types?
+
 * Modernization
 Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern
 and older C++ compilers.  Currently the code defaults to defining it to
@@ -264,7 +269,7 @@ It would be a very nice source of inspiration for the other 
languages.
 
 Valentin Tolmer is working on this.
 
-** yychar == yyempty_
+** yychar == YYEMPTY
 The code in yyerrlab reads:
 
       if (yychar <= YYEOF)
diff --git a/data/skeletons/README-D.txt b/data/skeletons/README-D.txt
index 214e3099..e6068b4e 100644
--- a/data/skeletons/README-D.txt
+++ b/data/skeletons/README-D.txt
@@ -27,7 +27,7 @@ public interface Lexer
    * to the next token and prepares to return the semantic value
    * and beginning/ending positions of the token.
    * @return the token identifier corresponding to the next token. */
-  YYTokenType yylex ();
+  TokenKind yylex ();
 
   /**
    * Entry point for error reporting.  Emits an error
@@ -39,7 +39,7 @@ public interface Lexer
    void yyerror (YYLocation loc, string s);
 }
 
-- semantic types are handled by D usions (same as for C/C++ parsers)
+- semantic types are handled by D unions (same as for C/C++ parsers)
 
 - the following (non-standard) %defines are supported:
 
diff --git a/data/skeletons/d.m4 b/data/skeletons/d.m4
index 4c3a5ce7..3041ac78 100644
--- a/data/skeletons/d.m4
+++ b/data/skeletons/d.m4
@@ -164,11 +164,9 @@ m4_define([b4_token_enum],
 # Output the definition of the tokens as enums.
 m4_define([b4_token_enums],
 [/* Token kinds.  */
-public enum YYTokenType {
-
-  /** Token returned by the scanner to signal the end of its input.  */
-  EOF = 0,
-b4_symbol_foreach([b4_token_enum])
+public enum TokenKind {
+  ]b4_symbol_kind([-2])[ = -2,
+b4_symbol_foreach([b4_token_enum])dnl
 }
 ])
 
diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d
index 150f1540..04528257 100644
--- a/data/skeletons/lalr1.d
+++ b/data/skeletons/lalr1.d
@@ -429,7 +429,7 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[)
   public bool parse ()
   {
     /// Lookahead and lookahead in internal form.
-    int yychar = yyempty_;
+    int yychar = TokenKind.YYEMPTY;
     SymbolKind yytoken = SymbolKind.]b4_symbol_prefix[YYEMPTY;
 
     /* State.  */
@@ -493,7 +493,7 @@ m4_popdef([b4_at_dollar])])dnl
         }
 
         /* Read a lookahead token.  */
-        if (yychar == yyempty_)
+        if (yychar == TokenKind.YYEMPTY)
         {]b4_parse_trace_if([[
           yycdebugln ("Reading a token");]])[
           yychar = yylex ();]b4_locations_if([[
@@ -532,7 +532,7 @@ m4_popdef([b4_at_dollar])])dnl
           yy_symbol_print ("Shifting", yytoken, yylval]b4_locations_if([, 
yylloc])[);]])[
 
           /* Discard the token being shifted.  */
-          yychar = yyempty_;
+          yychar = TokenKind.YYEMPTY;
 
           /* Count tokens shifted since error; after three, turn off error
            * status.  */
@@ -573,7 +573,7 @@ m4_popdef([b4_at_dollar])])dnl
         if (yyerrstatus_ == 0)
         {
           ++yynerrs_;
-          if (yychar == yyempty_)
+          if (yychar == TokenKind.YYEMPTY)
             yytoken = SymbolKind.]b4_symbol_prefix[YYEMPTY;
           yyerror (]b4_locations_if([yylloc, ])[yysyntax_error (yystate, 
yytoken));
         }
@@ -584,14 +584,14 @@ m4_popdef([b4_at_dollar])])dnl
           /* If just tried and failed to reuse lookahead token after an
            * error, discard it.  */
 
-          if (yychar <= YYTokenType.EOF)
+          if (yychar <= TokenKind.]b4_symbol(0, [id])[)
           {
             /* Return failure if at end of input.  */
-            if (yychar == YYTokenType.EOF)
+            if (yychar == TokenKind.]b4_symbol(0, [id])[)
              return false;
           }
           else
-            yychar = yyempty_;
+            yychar = TokenKind.YYEMPTY;
         }
 
         /* Else will try to reuse lookahead token after shifting the error
@@ -841,7 +841,6 @@ m4_popdef([b4_at_dollar])])dnl
 
   private static immutable int yylast_ = ]b4_last[;
   private static immutable int yynnts_ = ]b4_nterms_number[;
-  private static immutable int yyempty_ = -2;
   private static immutable int yyfinal_ = ]b4_final_state_number[;
   private static immutable int yyntokens_ = ]b4_tokens_number[;
 
diff --git a/examples/d/calc.y b/examples/d/calc.y
index 7e0c5f1a..a2ae85df 100644
--- a/examples/d/calc.y
+++ b/examples/d/calc.y
@@ -99,16 +99,15 @@ class CalcLexer(R) : Lexer
     while (!input.empty && input.front != '\n' && isWhite (input.front))
       input.popFront;
 
-    // Handle EOF.
     if (input.empty)
-      return YYTokenType.EOF;
+      return TokenKind.YYEOF;
 
     // Numbers.
     if (input.front.isNumber)
       {
         import std.conv : parse;
         semanticVal_.ival = input.parse!int;
-        return YYTokenType.NUM;
+        return TokenKind.NUM;
       }
 
     // Individual characters
@@ -116,16 +115,15 @@ class CalcLexer(R) : Lexer
     input.popFront;
     switch (ch)
       {
-      case EOF: return YYTokenType.EOF;
-      case '=': return YYTokenType.EQ;
-      case '+': return YYTokenType.PLUS;
-      case '-': return YYTokenType.MINUS;
-      case '*': return YYTokenType.STAR;
-      case '/': return YYTokenType.SLASH;
-      case '(': return YYTokenType.LPAR;
-      case ')': return YYTokenType.RPAR;
-      case '\n': return YYTokenType.EOL;
-      default:  assert(0);
+      case '=':  return TokenKind.EQ;
+      case '+':  return TokenKind.PLUS;
+      case '-':  return TokenKind.MINUS;
+      case '*':  return TokenKind.STAR;
+      case '/':  return TokenKind.SLASH;
+      case '(':  return TokenKind.LPAR;
+      case ')':  return TokenKind.RPAR;
+      case '\n': return TokenKind.EOL;
+      default: assert(0);
       }
   }
 }
diff --git a/tests/calc.at b/tests/calc.at
index 3a2830eb..1cdbb9af 100644
--- a/tests/calc.at
+++ b/tests/calc.at
@@ -306,13 +306,13 @@ class CalcLexer(R) : Lexer
 
     // Handle EOF.
     if (input.empty)
-      return YYTokenType.EOF;
+      return TokenKind.CALC_EOF;
 
     // Numbers.
     if (input.front.isNumber)
       {
         semanticVal_.ival = parseInt;
-        return YYTokenType.NUM;
+        return TokenKind.NUM;
       }
 
     // Individual characters
diff --git a/tests/scanner.at b/tests/scanner.at
index bc05538a..9ce13300 100644
--- a/tests/scanner.at
+++ b/tests/scanner.at
@@ -126,7 +126,7 @@ class YYLexer(R) : Lexer
     import std.uni : isNumber;
     // Handle EOF.
     if (input.empty)
-      return YYTokenType.EOF;
+      return TokenKind.END;
 
     auto c = input.front;
     input.popFront;
@@ -136,13 +136,13 @@ class YYLexer(R) : Lexer
     {
     case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
       semanticVal_.val = c - '0';
-      return YYTokenType.NUM;
-    case '+': return YYTokenType.PLUS;
-    case '-': return YYTokenType.MINUS;
-    case '*': return YYTokenType.STAR;
-    case '/': return YYTokenType.SLASH;
-    case '(': return YYTokenType.LPAR;
-    case ')': return YYTokenType.RPAR;
+      return TokenKind.NUM;
+    case '+': return TokenKind.PLUS;
+    case '-': return TokenKind.MINUS;
+    case '*': return TokenKind.STAR;
+    case '/': return TokenKind.SLASH;
+    case '(': return TokenKind.LPAR;
+    case ')': return TokenKind.RPAR;
     default: assert(0);
     }
   }
-- 
2.26.0

[PATCH 03/10] d: put YYEMPTY in the TokenKind

Reply via email to