[PATCH] D127357: [pseudo] wip/prototype: use LR0 instead of SLR1 table

Sam McCall via Phabricator via cfe-commits Wed, 08 Jun 2022 15:31:19 -0700

sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang-tools-extra.


This ignores the lookahead token when deciding what to reduce, we always reduce
unconditionally. The main effects are:

- all potential partial parses are visible e.g. to error recovery. (In error 
scenarios, the intended reduction may not occur with SLR1 when the error occurs 
at the lookahead token).
- the size of the LR table is much smaller (In this patch we go from 340kB => 
120kB, and the encoding isn't efficient)
- we explore more dead-ends due to lack of lookahead, and parser runs slower
- with this patch, the LR0 table parses ~1/3 slower than the SLR1. (LR0 allows 
code simplifications so we may win some of this back)

This patch uses `eod` as a dummy lookahead token for reduce actions:
i.e. reduces are Actions[State, eod]. The structure of glrParse is preserved:
we still track heads as "pending actions". To allow either LR0 or SLR1 to be
used, we perform lookups for both the actual lookahead token and eod.

In a real implementation we'd probably want to:

- switch glrParse to track head nodes, instead of pending actions on them. This 
should simplify the code there somewhat.
- use a separate storage in LRTable for reduce actions, like a parallel 
vector<RuleID> (sorted by stateid) and vector<size_t> (indexed by stateid)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D127357

Files:
  clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
  clang-tools-extra/pseudo/test/lr-build-basic.test
  clang-tools-extra/pseudo/test/lr-build-conflicts.test
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp

Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -108,7 +108,7 @@
       llvm::outs() << G->dump();
     if (PrintGraph)
       llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G);
-    auto LRTable = clang::pseudo::LRTable::buildSLR(*G);
+    auto LRTable = clang::pseudo::LRTable::buildLR0(*G);
     if (PrintTable)
       llvm::outs() << LRTable.dumpForTests(*G);
     if (PrintStatistics)
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===================================================================
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -36,12 +36,10 @@
 # TABLE-NEXT:     'EOF': accept
 # TABLE-NEXT:     '-': shift state 3
 # TABLE-NEXT: State 2
-# TABLE-NEXT:     'EOF': reduce by rule 1 'expr := IDENTIFIER'
-# TABLE-NEXT:     '-': reduce by rule 1 'expr := IDENTIFIER'
+# TABLE-NEXT:     'EOD': reduce by rule 1 'expr := IDENTIFIER'
 # TABLE-NEXT: State 3
 # TABLE-NEXT:     'IDENTIFIER': shift state 2
 # TABLE-NEXT:     'expr': go to state 4
 # TABLE-NEXT: State 4
-# TABLE-NEXT:     'EOF': reduce by rule 0 'expr := expr - expr'
+# TABLE-NEXT:     'EOD': reduce by rule 0 'expr := expr - expr'
 # TABLE-NEXT:     '-': shift state 3
-# TABLE-NEXT:     '-': reduce by rule 0 'expr := expr - expr'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===================================================================
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -24,6 +24,6 @@
 # TABLE-NEXT: State 1
 # TABLE-NEXT:     'EOF': accept
 # TABLE-NEXT: State 2
-# TABLE-NEXT:     'EOF': reduce by rule 1 'expr := id'
+# TABLE-NEXT:     'EOD': reduce by rule 1 'expr := id'
 # TABLE-NEXT: State 3
-# TABLE-NEXT:     'EOF': reduce by rule 0 'id := IDENTIFIER'
+# TABLE-NEXT:     'EOD': reduce by rule 0 'id := IDENTIFIER'
Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -135,5 +135,31 @@
   return std::move(Build).build(G.table(), Graph.states().size());
 }
 
+LRTable LRTable::buildLR0(const Grammar &G) {
+  auto Graph = LRGraph::buildLR0(G);
+  Builder Build(Graph.startStates());
+  for (const auto &T : Graph.edges()) {
+    Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
+    Build.insert({T.Src, T.Label, Act});
+  }
+  assert(Graph.states().size() <= (1 << StateBits) &&
+         "Graph states execceds the maximum limit!");
+  auto FollowSets = followSets(G);
+  for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
+    for (const Item &I : Graph.states()[SID].Items) {
+      // If we've just parsed the start symbol, we can accept the input.
+      if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
+        Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
+        continue;
+      }
+      if (!I.hasNext()) {
+        // If we've reached the end of a rule A := ..., then we can reduce.
+        Build.insert({SID, tokenSymbol(tok::eod), Action::reduce(I.rule())});
+      }
+    }
+  }
+  return std::move(Build).build(G.table(), Graph.states().size());
+}
+
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -62,6 +62,22 @@
         llvm_unreachable("unexpected action kind!");
       }
     }
+    for (const auto &Action :
+         Params.Table.getActions(Head->State, tokenSymbol(tok::eod))) {
+      switch (Action.kind()) {
+      case LRTable::Action::Shift:
+        PendingShift.push_back({Head, Action});
+        break;
+      case LRTable::Action::Reduce:
+        PendingReduce.push_back({Head, Action});
+        break;
+      case LRTable::Action::Accept:
+        PendingAccept.push_back({Head, Action});
+        break;
+      default:
+        llvm_unreachable("unexpected action kind!");
+      }
+    }
   };
   std::vector<const GSS::Node *> NewHeads = {
       GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol),
Index: clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
@@ -154,6 +154,7 @@
 
   // Build a SLR(1) parsing table.
   static LRTable buildSLR(const Grammar &G);
+  static LRTable buildLR0(const Grammar &G);
 
   class Builder;
   // Represents an entry in the table, used for building the LRTable.
Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
===================================================================
--- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -142,6 +142,20 @@
 }
 BENCHMARK(glrParse);
 
+static void glrParseLR0(benchmark::State &State) {
+  LRTable Table = clang::pseudo::LRTable::buildLR0(*G);
+  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  TokenStream Stream = lexAndPreprocess();
+  for (auto _ : State) {
+    pseudo::ForestArena Forest;
+    pseudo::GSS GSS;
+    pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
+  }
+  State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
+                          SourceText->size());
+}
+BENCHMARK(glrParseLR0);
+
 static void full(benchmark::State &State) {
   LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
   SymbolID StartSymbol = *G->findNonterminal("translation-unit");

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D127357: [pseudo] wip/prototype: use LR0 instead of SLR1 table

Reply via email to