hokein updated this revision to Diff 440366.
hokein added a comment.

remove a dependency from pseudoCLI lib


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128679/new/

https://reviews.llvm.org/D128679

Files:
  clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
  clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
  clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
  clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
  clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
  clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
  clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
  clang-tools-extra/pseudo/lib/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cli/CLI.cpp
  clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/tool/CMakeLists.txt
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/ParseLang.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TokenKinds.h"
@@ -47,9 +48,15 @@
 public:
   void build(llvm::StringRef GrammarBNF) {
     std::vector<std::string> Diags;
-    G = Grammar::parseBNF(GrammarBNF, Diags);
+    TestLang.G = Grammar::parseBNF(GrammarBNF, Diags);
   }
 
+  TokenStream emptyTokenStream() {
+    TokenStream Empty;
+    Empty.finalize();
+    return Empty;
+  }
+ 
   void buildGrammar(std::vector<std::string> Nonterminals,
                     std::vector<std::string> Rules) {
     Nonterminals.push_back("_");
@@ -65,19 +72,22 @@
 
   SymbolID id(llvm::StringRef Name) const {
     for (unsigned I = 0; I < NumTerminals; ++I)
-      if (G->table().Terminals[I] == Name)
+      if (TestLang.G->table().Terminals[I] == Name)
         return tokenSymbol(static_cast<tok::TokenKind>(I));
-    for (SymbolID ID = 0; ID < G->table().Nonterminals.size(); ++ID)
-      if (G->table().Nonterminals[ID].Name == Name)
+    for (SymbolID ID = 0; ID < TestLang.G->table().Nonterminals.size(); ++ID)
+      if (TestLang.G->table().Nonterminals[ID].Name == Name)
         return ID;
     ADD_FAILURE() << "No such symbol found: " << Name;
     return 0;
   }
 
   RuleID ruleFor(llvm::StringRef NonterminalName) const {
-    auto RuleRange = G->table().Nonterminals[id(NonterminalName)].RuleRange;
+    auto RuleRange =
+        TestLang.G->table().Nonterminals[id(NonterminalName)].RuleRange;
     if (RuleRange.End - RuleRange.Start == 1)
-      return G->table().Nonterminals[id(NonterminalName)].RuleRange.Start;
+      return TestLang.G->table()
+          .Nonterminals[id(NonterminalName)]
+          .RuleRange.Start;
     ADD_FAILURE() << "Expected a single rule for " << NonterminalName
                   << ", but it has " << RuleRange.End - RuleRange.Start
                   << " rule!\n";
@@ -85,7 +95,7 @@
   }
 
 protected:
-  std::unique_ptr<Grammar> G;
+  ParseLang TestLang;
   ForestArena Arena;
   GSS GSStack;
 };
@@ -111,8 +121,8 @@
                                    /*Parents=*/{GSSNode0});
 
   buildGrammar({}, {}); // Create a fake empty grammar.
-  LRTable T =
-      LRTable::buildForTests(G->table(), /*Entries=*/{
+  TestLang.Table =
+      LRTable::buildForTests(TestLang.G->table(), /*Entries=*/{
                                  {1, tokenSymbol(tok::semi), Action::shift(4)},
                                  {2, tokenSymbol(tok::semi), Action::shift(4)},
                                  {3, tokenSymbol(tok::semi), Action::shift(5)},
@@ -121,7 +131,7 @@
   ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
   std::vector<const GSS::Node *> NewHeads;
   glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
-           {*G, T, Arena, GSStack}, NewHeads);
+           {*TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
 
   EXPECT_THAT(NewHeads,
               UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
@@ -141,8 +151,8 @@
   buildGrammar({"class-name", "enum-name"},
                {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
 
-  LRTable Table = LRTable::buildForTests(
-      G->table(), {
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G->table(), {
                       {/*State=*/0, id("class-name"), Action::goTo(2)},
                       {/*State=*/0, id("enum-name"), Action::goTo(3)},
                       {/*State=*/1, tokenSymbol(tok::l_brace),
@@ -157,7 +167,7 @@
       GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
 
   std::vector<const GSS::Node *> Heads = {GSSNode1};
-  glrReduce(Heads, tokenSymbol(tok::l_brace), {*G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::l_brace), {*TestLang.G, TestLang.Table, Arena, GSStack});
   EXPECT_THAT(Heads, UnorderedElementsAre(
                          GSSNode1,
                          AllOf(state(2), parsedSymbolID(id("class-name")),
@@ -188,8 +198,8 @@
       /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
       /*Parents=*/{GSSNode2, GSSNode3});
 
-  LRTable Table = LRTable::buildForTests(
-      G->table(),
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G->table(),
       {
           {/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
           {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
@@ -197,7 +207,7 @@
            Action::reduce(ruleFor("ptr-operator"))},
       });
   std::vector<const GSS::Node *> Heads = {GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::identifier), {*G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::identifier), {*TestLang.G, TestLang.Table, Arena, GSStack});
 
   EXPECT_THAT(Heads, UnorderedElementsAre(
                          GSSNode4,
@@ -241,8 +251,8 @@
                       /*Parents=*/{GSSNode2});
 
   // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
-  LRTable Table = LRTable::buildForTests(
-      G->table(),
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G->table(),
       {
           {/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
           {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
@@ -252,7 +262,7 @@
            Action::reduce(/* type-name := enum-name */ 1)},
       });
   std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::l_paren), {*G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::l_paren), {*TestLang.G, TestLang.Table, Arena, GSStack});
 
   // Verify that the stack heads are joint at state 5 after reduces.
   EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -261,7 +271,7 @@
                                                 parents({GSSNode1, GSSNode2}))))
       << Heads;
   // Verify that we create an ambiguous ForestNode of two parses of `type-name`.
-  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G),
+  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*TestLang.G),
             "[  1, end) type-name := <ambiguous>\n"
             "[  1, end) ├─type-name := class-name\n"
             "[  1, end) │ └─class-name := <opaque>\n"
@@ -299,8 +309,8 @@
                       /*Parents=*/{GSSNode2});
 
   // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
-  LRTable Table = LRTable::buildForTests(
-      G->table(), {
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G->table(), {
                       {/*State=*/0, id("pointer"), Action::goTo(5)},
                       {3, tokenSymbol(tok::l_paren),
                        Action::reduce(/* pointer := class-name */ 0)},
@@ -308,14 +318,14 @@
                        Action::reduce(/* pointer := enum-name */ 1)},
                   });
   std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::l_paren), {*G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::l_paren), {*TestLang.G, TestLang.Table, Arena, GSStack});
 
   EXPECT_THAT(
       Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
                                   AllOf(state(5), parsedSymbolID(id("pointer")),
                                         parents({GSSNode0}))))
       << Heads;
-  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G),
+  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*TestLang.G),
             "[  0, end) pointer := <ambiguous>\n"
             "[  0, end) ├─pointer := class-name *\n"
             "[  0,   1) │ ├─class-name := <opaque>\n"
@@ -342,15 +352,15 @@
     left-paren := {
     expr := IDENTIFIER
   )bnf");
+  TestLang.Table = LRTable::buildSLR(*TestLang.G);
   clang::LangOptions LOptions;
   const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(*G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+      glrParse(Tokens, {*TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
   // Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
   // in the forest, see the `#1` and `=#1` in the dump string.
-  EXPECT_EQ(Parsed.dumpRecursive(*G),
+  EXPECT_EQ(Parsed.dumpRecursive(*TestLang.G),
             "[  0, end) test := <ambiguous>\n"
             "[  0, end) ├─test := { expr\n"
             "[  0,   1) │ ├─{ := tok[0]\n"
@@ -382,11 +392,11 @@
   )bnf");
   clang::LangOptions LOptions;
   const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(*G);
+  TestLang.Table = LRTable::buildSLR(*TestLang.G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
-  EXPECT_EQ(Parsed.dumpRecursive(*G),
+      glrParse(Tokens, {*TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*TestLang.G),
             "[  0, end) test := <ambiguous>\n"
             "[  0, end) ├─test := IDENTIFIER\n"
             "[  0, end) │ └─IDENTIFIER := tok[0]\n"
@@ -407,11 +417,11 @@
   // of the nonterminal `test` when the next token is `eof`, verify that the
   // parser stops at the right state.
   const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(*G);
+  TestLang.Table = LRTable::buildSLR(*TestLang.G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
-  EXPECT_EQ(Parsed.dumpRecursive(*G),
+      glrParse(Tokens, {*TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*TestLang.G),
             "[  0, end) test := IDENTIFIER test\n"
             "[  0,   1) ├─IDENTIFIER := tok[0]\n"
             "[  1, end) └─test := IDENTIFIER\n"
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -9,7 +9,9 @@
 #include "clang-pseudo/Bracket.h"
 #include "clang-pseudo/DirectiveTree.h"
 #include "clang-pseudo/GLR.h"
+#include "clang-pseudo/ParseLang.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRGraph.h"
 #include "clang-pseudo/grammar/LRTable.h"
@@ -20,14 +22,11 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
 
-using clang::pseudo::Grammar;
 using clang::pseudo::TokenStream;
 using llvm::cl::desc;
 using llvm::cl::init;
 using llvm::cl::opt;
 
-static opt<std::string>
-    Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
 static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
 static opt<bool> PrintGraph("print-graph",
                             desc("Print the LR graph for the grammar"));
@@ -93,49 +92,40 @@
     pairBrackets(*ParseableStream);
   }
 
-  if (Grammar.getNumOccurrences()) {
-    std::string Text = readOrDie(Grammar);
-    std::vector<std::string> Diags;
-    auto G = Grammar::parseBNF(Text, Diags);
+  const auto &Lang = clang::pseudo::getParseLangFromFlags();
+  if (PrintGrammar)
+    llvm::outs() << Lang.G->dump();
+  if (PrintGraph)
+    llvm::outs() << clang::pseudo::LRGraph::buildLR0(*Lang.G).dumpForTests(
+        *Lang.G);
 
-    if (!Diags.empty()) {
-      llvm::errs() << llvm::join(Diags, "\n");
+  if (PrintTable)
+    llvm::outs() << Lang.Table.dumpForTests(*Lang.G);
+  if (PrintStatistics)
+    llvm::outs() << Lang.Table.dumpStatistics();
+
+  if (ParseableStream) {
+    clang::pseudo::ForestArena Arena;
+    clang::pseudo::GSS GSS;
+    llvm::Optional<clang::pseudo::SymbolID> StartSymID =
+        Lang.G->findNonterminal(StartSymbol);
+    if (!StartSymID) {
+      llvm::errs() << llvm::formatv(
+          "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
       return 2;
     }
-    llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
-                                  Grammar);
-    if (PrintGrammar)
-      llvm::outs() << G->dump();
-    if (PrintGraph)
-      llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G);
-    auto LRTable = clang::pseudo::LRTable::buildSLR(*G);
-    if (PrintTable)
-      llvm::outs() << LRTable.dumpForTests(*G);
-    if (PrintStatistics)
-      llvm::outs() << LRTable.dumpStatistics();
-
-    if (ParseableStream) {
-      clang::pseudo::ForestArena Arena;
-      clang::pseudo::GSS GSS;
-      llvm::Optional<clang::pseudo::SymbolID> StartSymID =
-          G->findNonterminal(StartSymbol);
-      if (!StartSymID) {
-        llvm::errs() << llvm::formatv(
-            "The start symbol {0} doesn't exit in the grammar!\n", Grammar);
-        return 2;
-      }
-      auto &Root = glrParse(*ParseableStream,
-                            clang::pseudo::ParseParams{*G, LRTable, Arena, GSS},
-                            *StartSymID);
-      if (PrintForest)
-        llvm::outs() << Root.dumpRecursive(*G, /*Abbreviated=*/true);
+    auto &Root =
+        glrParse(*ParseableStream,
+                 clang::pseudo::ParseParams{*Lang.G, Lang.Table, Arena, GSS},
+                 *StartSymID);
+    if (PrintForest)
+      llvm::outs() << Root.dumpRecursive(*Lang.G, /*Abbreviated=*/true);
 
-      if (PrintStatistics) {
-        llvm::outs() << "Forest bytes: " << Arena.bytes()
-                     << " nodes: " << Arena.nodeCount() << "\n";
-        llvm::outs() << "GSS bytes: " << GSS.bytes()
-                     << " nodes: " << GSS.nodesCreated() << "\n";
-      }
+    if (PrintStatistics) {
+      llvm::outs() << "Forest bytes: " << Arena.bytes()
+                   << " nodes: " << Arena.nodeCount() << "\n";
+      llvm::outs() << "GSS bytes: " << GSS.bytes()
+                   << " nodes: " << GSS.nodesCreated() << "\n";
     }
   }
 
Index: clang-tools-extra/pseudo/tool/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/tool/CMakeLists.txt
+++ clang-tools-extra/pseudo/tool/CMakeLists.txt
@@ -13,5 +13,6 @@
   PRIVATE
   clangPseudo
   clangPseudoGrammar
+  clangPseudoCLI
   )
 
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,26 +7,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/ParseLang.h"
+#include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
+#include <utility>
 
 namespace clang {
 namespace pseudo {
 namespace cxx {
-
+namespace {
 static const char *CXXBNF =
 #include "CXXBNF.inc"
     ;
+} // namespace
 
-const Grammar &getGrammar() {
-  static std::vector<std::string> Diags;
-  static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release();
-  assert(Diags.empty());
-  return *G;
-}
-
-const LRTable &getLRTable() {
-  static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
-  return *Table;
+const ParseLang &getParseLang() {
+  static const auto &CXXLanguage = []() -> const ParseLang & {
+    std::vector<std::string> Diags;
+    auto G = Grammar::parseBNF(CXXBNF, Diags);
+    assert(Diags.empty());
+    LRTable Table = LRTable::buildSLR(*G);
+    const ParseLang *PL = new ParseLang{
+        std::move(G),
+        std::move(Table),
+    };
+    return *PL;
+  }();
+  return CXXLanguage;
 }
 
 } // namespace cxx
Index: clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_clang_library(clangPseudoCLI
+  CLI.cpp
+
+  LINK_LIBS
+  clangPseudoGrammar
+  clangPseudoCXX
+  )
Index: clang-tools-extra/pseudo/lib/cli/CLI.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -0,0 +1,52 @@
+//===--- CLI.cpp -  ----------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "clang-pseudo/cli/CLI.h"
+#include "clang-pseudo/ParseLang.h"
+#include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/grammar/LRTable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <utility>
+
+static llvm::cl::opt<std::string> Grammar(
+    "grammar",
+    llvm::cl::desc(
+        "Specify a BNF grammar file path, or a builtin language (cxx)."),
+    llvm::cl::init("cxx"));
+
+namespace clang {
+namespace pseudo {
+
+const ParseLang &getParseLangFromFlags() {
+  if (::Grammar == "cxx")
+    return cxx::getParseLang();
+  static ParseLang *PLang = []() {
+    // Read from a bnf grammar file.
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
+        llvm::MemoryBuffer::getFile(::Grammar);
+    if (std::error_code EC = GrammarText.getError()) {
+      llvm::errs() << "Error: can't read grammar file '" << ::Grammar
+                   << "': " << EC.message() << "\n";
+      std::exit(1);
+    }
+    std::vector<std::string> Diags;
+    auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
+    if (!Diags.empty()) {
+      for (const auto &Diag : Diags)
+        llvm::errs() << Diag << "\n";
+    }
+    return new ParseLang{std::move(G), LRTable::buildSLR(*G)};
+  }();
+  return *PLang;
+}
+
+} // namespace pseudo
+} // namespace clang
Index: clang-tools-extra/pseudo/lib/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory(cli)
 add_subdirectory(cxx)
 add_subdirectory(grammar)
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -23,12 +23,11 @@
 #ifndef CLANG_PSEUDO_CXX_CXX_H
 #define CLANG_PSEUDO_CXX_CXX_H
 
+#include "clang-pseudo/ParseLang.h"
 #include "clang-pseudo/grammar/Grammar.h"
 
 namespace clang {
 namespace pseudo {
-class LRTable;
-
 namespace cxx {
 // Symbol represents nonterminal symbols in the C++ grammar.
 // It provides a simple uniform way to access a particular nonterminal.
@@ -38,10 +37,7 @@
 #undef NONTERMINAL
 };
 
-// Returns the C++ grammar.
-const Grammar &getGrammar();
-// Returns the corresponding LRTable for the C++ grammar.
-const LRTable &getLRTable();
+const ParseLang &getParseLang();
 
 } // namespace cxx
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
@@ -0,0 +1,32 @@
+//===--- CLI.h - Get grammar from variant sources ----------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A library shared among different pseudoparser-based tools. It provides a
+// uniform way to get basic pieces of the parser (Grammar, LRTable etc) from
+// variant grammar sources.
+// It defines a `--grammar` CLI flag, which supports 1) using a grammar from a
+// file (--grammar=/path/to/lang.bnf) or using the prebuilt cxx language
+// (--grammar=cxx).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_CLI_CLI_H
+#define CLANG_PSEUDO_CLI_CLI_H
+
+#include "clang-pseudo/ParseLang.h"
+
+namespace clang {
+namespace pseudo {
+
+// Returns the corresponding language from the '--grammar' command-line flag.
+const ParseLang &getParseLangFromFlags();
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_CLI_CLI_H
Index: clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
@@ -0,0 +1,31 @@
+//===--- ParseLang.h ------------------------------------------- -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_PARSELANG_H
+#define CLANG_PSEUDO_PARSELANG_H
+
+#include "clang-pseudo/grammar/Grammar.h"
+#include "clang-pseudo/grammar/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+
+// Specify a language that can be parsed by the pseduoparser.
+// Manifest generated from a bnf grammar file.
+struct ParseLang {
+  std::unique_ptr<Grammar> G;
+  LRTable Table;
+
+  // FIXME: add clang::LangOptions.
+  // FIXME: add default start symbols.
+};
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_PARSELANG_H
Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -112,6 +112,7 @@
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
 
 // Parameters for the GLR parsing.
+// FIXME: refine it with the ParseLang struct.
 struct ParseParams {
   // The grammar of the language we're going to parse.
   const Grammar &G;
Index: clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
===================================================================
--- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -10,6 +10,7 @@
 #include "clang-pseudo/Forest.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
@@ -24,28 +25,10 @@
 
 class Fuzzer {
   clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
-  std::unique_ptr<Grammar> G;
-  LRTable T;
   bool Print;
 
 public:
-  Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
-    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
-        llvm::MemoryBuffer::getFile(GrammarPath);
-    if (std::error_code EC = GrammarText.getError()) {
-      llvm::errs() << "Error: can't read grammar file '" << GrammarPath
-                   << "': " << EC.message() << "\n";
-      std::exit(1);
-    }
-    std::vector<std::string> Diags;
-    G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
-    if (!Diags.empty()) {
-      for (const auto &Diag : Diags)
-        llvm::errs() << Diag << "\n";
-      std::exit(1);
-    }
-    T = LRTable::buildSLR(*G);
-  }
+  Fuzzer(bool Print) : Print(Print) {}
 
   void operator()(llvm::StringRef Code) {
     std::string CodeStr = Code.str(); // Must be null-terminated.
@@ -58,11 +41,13 @@
 
     clang::pseudo::ForestArena Arena;
     clang::pseudo::GSS GSS;
+    const ParseLang &PLang = getParseLangFromFlags();
     auto &Root =
-        glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS},
-                 *G->findNonterminal("translation-unit"));
+        glrParse(ParseableStream,
+                 clang::pseudo::ParseParams{*PLang.G, PLang.Table, Arena, GSS},
+                 *PLang.G->findNonterminal("translation-unit"));
     if (Print)
-      llvm::outs() << Root.dumpRecursive(*G);
+      llvm::outs() << Root.dumpRecursive(*PLang.G);
   }
 };
 
@@ -75,16 +60,11 @@
 extern "C" {
 
 // Set up the fuzzer from command line flags:
-//  -grammar=<file> (required) - path to cxx.bnf
 //  -print                     - used for testing the fuzzer
 int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
-  llvm::StringRef GrammarFile;
   bool PrintForest = false;
   auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
-    if (Arg.consume_front("-grammar=")) {
-      GrammarFile = Arg;
-      return true;
-    } else if (Arg == "-print") {
+    if (Arg == "-print") {
       PrintForest = true;
       return true;
     }
@@ -92,11 +72,7 @@
   };
   *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
 
-  if (GrammarFile.empty()) {
-    fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
-    exit(1);
-  }
-  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
+  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest);
   return 0;
 }
 
Index: clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
+++ clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
@@ -11,5 +11,6 @@
 target_link_libraries(clang-pseudo-fuzzer
   PRIVATE
   clangPseudo
+  clangPseudoCLI
   clangPseudoGrammar
   )
Index: clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
+++ clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
@@ -3,6 +3,7 @@
 target_link_libraries(ClangPseudoBenchmark
   PRIVATE
   clangPseudo
+  clangPseudoCLI
   clangPseudoGrammar
   LLVMSupport
   )
Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
===================================================================
--- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -25,6 +25,7 @@
 #include "clang-pseudo/Forest.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
@@ -39,9 +40,6 @@
 using llvm::cl::opt;
 using llvm::cl::Required;
 
-static opt<std::string> GrammarFile("grammar",
-                                    desc("Parse and check a BNF grammar file."),
-                                    Required);
 static opt<std::string> Source("source", desc("Source file"), Required);
 
 namespace clang {
@@ -49,11 +47,10 @@
 namespace bench {
 namespace {
 
-const std::string *GrammarText = nullptr;
 const std::string *SourceText = nullptr;
-const Grammar *G = nullptr;
+const ParseLang *PLang = nullptr;
 
-void setupGrammarAndSource() {
+void setup() {
   auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
         llvm::MemoryBuffer::getFile(FilePath);
@@ -64,22 +61,13 @@
     }
     return GrammarText.get()->getBuffer().str();
   };
-  GrammarText = new std::string(ReadFile(GrammarFile));
   SourceText = new std::string(ReadFile(Source));
-  std::vector<std::string> Diags;
-  G = Grammar::parseBNF(*GrammarText, Diags).release();
+  PLang = &getParseLangFromFlags();
 }
 
-static void parseBNF(benchmark::State &State) {
-  std::vector<std::string> Diags;
-  for (auto _ : State)
-    Grammar::parseBNF(*GrammarText, Diags);
-}
-BENCHMARK(parseBNF);
-
 static void buildSLR(benchmark::State &State) {
   for (auto _ : State)
-    LRTable::buildSLR(*G);
+    LRTable::buildSLR(*PLang->G);
 }
 BENCHMARK(buildSLR);
 
@@ -129,13 +117,13 @@
 BENCHMARK(preprocess);
 
 static void glrParse(benchmark::State &State) {
-  LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
-  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  SymbolID StartSymbol = *PLang->G->findNonterminal("translation-unit");
   TokenStream Stream = lexAndPreprocess();
   for (auto _ : State) {
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
+    pseudo::glrParse(Stream, ParseParams{*PLang->G, PLang->Table, Forest, GSS},
+                     StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
                           SourceText->size());
@@ -143,13 +131,13 @@
 BENCHMARK(glrParse);
 
 static void full(benchmark::State &State) {
-  LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
-  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  SymbolID StartSymbol = *PLang->G->findNonterminal("translation-unit");
   for (auto _ : State) {
     TokenStream Stream = lexAndPreprocess();
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
+    pseudo::glrParse(lexAndPreprocess(),
+                     ParseParams{*PLang->G, PLang->Table, Forest, GSS},
                      StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
@@ -165,7 +153,7 @@
 int main(int argc, char *argv[]) {
   benchmark::Initialize(&argc, argv);
   llvm::cl::ParseCommandLineOptions(argc, argv);
-  clang::pseudo::bench::setupGrammarAndSource();
+  clang::pseudo::bench::setup();
   benchmark::RunSpecifiedBenchmarks();
   return 0;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to