sammccall created this revision. sammccall added a reviewer: hokein. Herald added a project: All. sammccall requested review of this revision. Herald added subscribers: cfe-commits, alextsao1999. Herald added a project: clang-tools-extra.
This ignores the lookahead token when deciding what to reduce, we always reduce unconditionally. The main effects are: - all potential partial parses are visible e.g. to error recovery. (In error scenarios, the intended reduction may not occur with SLR1 when the error occurs at the lookahead token). - the size of the LR table is much smaller (In this patch we go from 340kB => 120kB, and the encoding isn't efficient) - we explore more dead-ends due to lack of lookahead, and parser runs slower - with this patch, the LR0 table parses ~1/3 slower than the SLR1. (LR0 allows code simplifications so we may win some of this back) This patch uses `eod` as a dummy lookahead token for reduce actions: i.e. reduces are Actions[State, eod]. The structure of glrParse is preserved: we still track heads as "pending actions". To allow either LR0 or SLR1 to be used, we perform lookups for both the actual lookahead token and eod. In a real implementation we'd probably want to: - switch glrParse to track head nodes, instead of pending actions on them. This should simplify the code there somewhat. - use a separate storage in LRTable for reduce actions, like a parallel vector<RuleID> (sorted by stateid) and vector<size_t> (indexed by stateid) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D127357 Files: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h clang-tools-extra/pseudo/lib/GLR.cpp clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp clang-tools-extra/pseudo/test/lr-build-basic.test clang-tools-extra/pseudo/test/lr-build-conflicts.test clang-tools-extra/pseudo/tool/ClangPseudo.cpp
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp =================================================================== --- clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -108,7 +108,7 @@ llvm::outs() << G->dump(); if (PrintGraph) llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G); - auto LRTable = clang::pseudo::LRTable::buildSLR(*G); + auto LRTable = clang::pseudo::LRTable::buildLR0(*G); if (PrintTable) llvm::outs() << LRTable.dumpForTests(*G); if (PrintStatistics) Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test =================================================================== --- clang-tools-extra/pseudo/test/lr-build-conflicts.test +++ clang-tools-extra/pseudo/test/lr-build-conflicts.test @@ -36,12 +36,10 @@ # TABLE-NEXT: 'EOF': accept # TABLE-NEXT: '-': shift state 3 # TABLE-NEXT: State 2 -# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER' -# TABLE-NEXT: '-': reduce by rule 1 'expr := IDENTIFIER' +# TABLE-NEXT: 'EOD': reduce by rule 1 'expr := IDENTIFIER' # TABLE-NEXT: State 3 # TABLE-NEXT: 'IDENTIFIER': shift state 2 # TABLE-NEXT: 'expr': go to state 4 # TABLE-NEXT: State 4 -# TABLE-NEXT: 'EOF': reduce by rule 0 'expr := expr - expr' +# TABLE-NEXT: 'EOD': reduce by rule 0 'expr := expr - expr' # TABLE-NEXT: '-': shift state 3 -# TABLE-NEXT: '-': reduce by rule 0 'expr := expr - expr' Index: clang-tools-extra/pseudo/test/lr-build-basic.test =================================================================== --- clang-tools-extra/pseudo/test/lr-build-basic.test +++ clang-tools-extra/pseudo/test/lr-build-basic.test @@ -24,6 +24,6 @@ # TABLE-NEXT: State 1 # TABLE-NEXT: 'EOF': accept # TABLE-NEXT: State 2 -# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id' +# TABLE-NEXT: 'EOD': reduce by rule 1 'expr := id' # TABLE-NEXT: State 3 -# TABLE-NEXT: 'EOF': reduce by rule 0 'id := IDENTIFIER' +# TABLE-NEXT: 'EOD': reduce by rule 0 'id := IDENTIFIER' Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp =================================================================== --- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -135,5 +135,31 @@ return std::move(Build).build(G.table(), Graph.states().size()); } +LRTable LRTable::buildLR0(const Grammar &G) { + auto Graph = LRGraph::buildLR0(G); + Builder Build(Graph.startStates()); + for (const auto &T : Graph.edges()) { + Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst); + Build.insert({T.Src, T.Label, Act}); + } + assert(Graph.states().size() <= (1 << StateBits) && + "Graph states execceds the maximum limit!"); + auto FollowSets = followSets(G); + for (StateID SID = 0; SID < Graph.states().size(); ++SID) { + for (const Item &I : Graph.states()[SID].Items) { + // If we've just parsed the start symbol, we can accept the input. + if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) { + Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())}); + continue; + } + if (!I.hasNext()) { + // If we've reached the end of a rule A := ..., then we can reduce. + Build.insert({SID, tokenSymbol(tok::eod), Action::reduce(I.rule())}); + } + } + } + return std::move(Build).build(G.table(), Graph.states().size()); +} + } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/lib/GLR.cpp =================================================================== --- clang-tools-extra/pseudo/lib/GLR.cpp +++ clang-tools-extra/pseudo/lib/GLR.cpp @@ -62,6 +62,22 @@ llvm_unreachable("unexpected action kind!"); } } + for (const auto &Action : + Params.Table.getActions(Head->State, tokenSymbol(tok::eod))) { + switch (Action.kind()) { + case LRTable::Action::Shift: + PendingShift.push_back({Head, Action}); + break; + case LRTable::Action::Reduce: + PendingReduce.push_back({Head, Action}); + break; + case LRTable::Action::Accept: + PendingAccept.push_back({Head, Action}); + break; + default: + llvm_unreachable("unexpected action kind!"); + } + } }; std::vector<const GSS::Node *> NewHeads = { GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol), Index: clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h +++ clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h @@ -154,6 +154,7 @@ // Build a SLR(1) parsing table. static LRTable buildSLR(const Grammar &G); + static LRTable buildLR0(const Grammar &G); class Builder; // Represents an entry in the table, used for building the LRTable. Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp =================================================================== --- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -142,6 +142,20 @@ } BENCHMARK(glrParse); +static void glrParseLR0(benchmark::State &State) { + LRTable Table = clang::pseudo::LRTable::buildLR0(*G); + SymbolID StartSymbol = *G->findNonterminal("translation-unit"); + TokenStream Stream = lexAndPreprocess(); + for (auto _ : State) { + pseudo::ForestArena Forest; + pseudo::GSS GSS; + pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol); + } + State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) * + SourceText->size()); +} +BENCHMARK(glrParseLR0); + static void full(benchmark::State &State) { LRTable Table = clang::pseudo::LRTable::buildSLR(*G); SymbolID StartSymbol = *G->findNonterminal("translation-unit");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits