[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-09 Thread Haojian Wu via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7a05942dd0c5: [pseudo] Remove the explicit Accept actions. 
(authored by hokein).

Changed prior to commit:
  https://reviews.llvm.org/D125677?vs=435309=435459#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
  clang-tools-extra/pseudo/test/lr-build-basic.test
  clang-tools-extra/pseudo/test/lr-build-conflicts.test
  clang-tools-extra/pseudo/unittests/GLRTest.cpp
  clang-tools-extra/pseudo/unittests/LRTableTest.cpp

Index: clang-tools-extra/pseudo/unittests/LRTableTest.cpp
===
--- clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -33,7 +33,7 @@
   std::vector Entries = {
   {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
   {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
-  {/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)},
+  {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
   {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
   GrammarTable GT;
   LRTable T = LRTable::buildForTests(GT, Entries);
@@ -41,7 +41,7 @@
   EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
-  UnorderedElementsAre(Action::accept(2)));
+  UnorderedElementsAre(Action::reduce(2)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
   EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::reduce(1)));
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -393,6 +393,29 @@
 "[  0, end) └─IDENTIFIER := tok[0]\n");
 }
 
+TEST_F(GLRTest, NoExplicitAccept) {
+  build(R"bnf(
+_ := test
+
+test := IDENTIFIER test
+test := IDENTIFIER
+  )bnf");
+  clang::LangOptions LOptions;
+  // Given the following input, and the grammar above, we perform two reductions
+  // of the nonterminal `test` when the next token is `eof`, verify that the
+  // parser stops at the right state.
+  const TokenStream  = cook(lex("id id", LOptions), LOptions);
+  auto LRTable = LRTable::buildSLR(*G);
+
+  const ForestNode  =
+  glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*G),
+"[  0, end) test := IDENTIFIER test\n"
+"[  0,   1) ├─IDENTIFIER := tok[0]\n"
+"[  1, end) └─test := IDENTIFIER\n"
+"[  1, end)   └─IDENTIFIER := tok[1]\n");
+}
+
 TEST(GSSTest, GC) {
   //  ┌-A-┬-AB
   //  ├-B-┘
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -33,7 +33,6 @@
 # TABLE-NEXT: 'IDENTIFIER': shift state 2
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
 # TABLE-NEXT: '-': shift state 3
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -22,7 +22,6 @@
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: 'id': go to state 2
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
 # TABLE-NEXT: State 3
Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
===
--- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -117,11 +117,11 @@
   auto FollowSets = followSets(G);
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
 for (const Item  : Graph.states()[SID].Items) {
-  // If we've just parsed the start symbol, we can accept the input.
-  if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
-Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
+  // If we've just parsed the start symbol, this means we successfully parse
+ 

[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-09 Thread Haojian Wu via Phabricator via cfe-commits
hokein added inline comments.



Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:86
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {

sammccall wrote:
> hokein wrote:
> > sammccall wrote:
> > > this line introduces a requirement that the start symbol be a nonterminal 
> > > - if this is new, can we doc it?
> > this is not a new requirement, it is rather implicit previously (we use 
> > `G.findNonterminal` to pass the start-symbol argument of the `glrParse`). I 
> > added an assertion in the glrParse.
> > 
> > It would be nice to diagnose it in the bnf grammar parsing time (will add a 
> > followup patch).
> FWIW I think an assertion (and possibly a comment) is the right tradeoff 
> here, maintaining diagnostics for all this stuff seems like overkill
Fair enough. I underestimated the effort of doing this stuff (we need to adjust 
a few existing tests in GrammarTests as well).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-08 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added inline comments.



Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:86
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {

hokein wrote:
> sammccall wrote:
> > this line introduces a requirement that the start symbol be a nonterminal - 
> > if this is new, can we doc it?
> this is not a new requirement, it is rather implicit previously (we use 
> `G.findNonterminal` to pass the start-symbol argument of the `glrParse`). I 
> added an assertion in the glrParse.
> 
> It would be nice to diagnose it in the bnf grammar parsing time (will add a 
> followup patch).
FWIW I think an assertion (and possibly a comment) is the right tradeoff here, 
maintaining diagnostics for all this stuff seems like overkill


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-08 Thread Haojian Wu via Phabricator via cfe-commits
hokein added inline comments.



Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:86
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {

sammccall wrote:
> this line introduces a requirement that the start symbol be a nonterminal - 
> if this is new, can we doc it?
this is not a new requirement, it is rather implicit previously (we use 
`G.findNonterminal` to pass the start-symbol argument of the `glrParse`). I 
added an assertion in the glrParse.

It would be nice to diagnose it in the bnf grammar parsing time (will add a 
followup patch).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-08 Thread Haojian Wu via Phabricator via cfe-commits
hokein updated this revision to Diff 435309.
hokein marked an inline comment as done.
hokein added a comment.

switch to the previous simple version


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
  clang-tools-extra/pseudo/test/lr-build-basic.test
  clang-tools-extra/pseudo/test/lr-build-conflicts.test
  clang-tools-extra/pseudo/unittests/GLRTest.cpp
  clang-tools-extra/pseudo/unittests/LRTableTest.cpp

Index: clang-tools-extra/pseudo/unittests/LRTableTest.cpp
===
--- clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -33,7 +33,7 @@
   std::vector Entries = {
   {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
   {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
-  {/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)},
+  {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
   {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
   GrammarTable GT;
   LRTable T = LRTable::buildForTests(GT, Entries);
@@ -41,7 +41,7 @@
   EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
-  UnorderedElementsAre(Action::accept(2)));
+  UnorderedElementsAre(Action::reduce(2)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
   EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::reduce(1)));
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -393,6 +393,29 @@
 "[  0, end) └─IDENTIFIER := tok[0]\n");
 }
 
+TEST_F(GLRTest, NoExplicitAccept) {
+  build(R"bnf(
+_ := test
+
+test := IDENTIFIER test
+test := IDENTIFIER
+  )bnf");
+  clang::LangOptions LOptions;
+  // Given the following input, and the grammar above, we perform two reductions
+  // of the nonterminal `test` when the next token is `eof`, verify that the
+  // parser stops at the right state.
+  const TokenStream  = cook(lex("id id", LOptions), LOptions);
+  auto LRTable = LRTable::buildSLR(*G);
+
+  const ForestNode  =
+  glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*G),
+"[  0, end) test := IDENTIFIER test\n"
+"[  0,   1) ├─IDENTIFIER := tok[0]\n"
+"[  1, end) └─test := IDENTIFIER\n"
+"[  1, end)   └─IDENTIFIER := tok[1]\n");
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -33,7 +33,6 @@
 # TABLE-NEXT: 'IDENTIFIER': shift state 2
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
 # TABLE-NEXT: '-': shift state 3
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -22,7 +22,6 @@
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: 'id': go to state 2
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
 # TABLE-NEXT: State 3
Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
===
--- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -124,11 +124,11 @@
   auto FollowSets = followSets(G);
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
 for (const Item  : Graph.states()[SID].Items) {
-  // If we've just parsed the start symbol, we can accept the input.
-  if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
-Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
+  // If we've just parsed the start symbol, this means we successfully parse
+  // the input. We don't add the reduce action of `_ := start_symbol` in the
+  // LRTable (the GLR parser handles it specifically).
+  if 

[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-08 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added inline comments.



Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:85
 
-  if (!PendingAccept.empty()) {
-LLVM_DEBUG({
-  llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n",
- PendingAccept.size());
-  for (const auto  : PendingAccept)
-llvm::dbgs() << "  - " << G.symbolName(Accept.Head->Payload->symbol())
- << "\n";
-});
-assert(PendingAccept.size() == 1);
-return *PendingAccept.front().Head->Payload;
-  }
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);

hokein wrote:
> sammccall wrote:
> > rather than mingling this with the glrReduce, I'd suggest collecting the 
> > set of final heads first and then analyzing them afterwards.
> > 
> > This means looping a second time, but I think the logic around recognizing 
> > patterns that look like `accept` might grow (e.g. if we want to incorporate 
> > some error tolerance)
> that sounds reasonable, but it requires some additional changes (to identify 
> inactive heads in the callback of `glrReduce`):
> 
> - AddSteps now returns true/false whether there is any actions in 
> LRTable[NewHead->state][nextTok];
> - no available action in the acceptable state on the eof token in LRTable 
> (see the change in lr-build-basic.test);
Hmm, I'm not sure whether the conditional or unconditional version is better, 
and we're really borrowing problems from the future here.

Let's go back to this simplest version, and update the opaque forest node 
comment below to a FIXME.
(We will need to invoke our generic error-recovery handlers when we reach EOF 
without reaching accept state, and simulating shifting an eof token may be the 
best way to reuse the code)



Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:86
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {

this line introduces a requirement that the start symbol be a nonterminal - if 
this is new, can we doc it?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-08 Thread Haojian Wu via Phabricator via cfe-commits
hokein added inline comments.



Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:85
 
-  if (!PendingAccept.empty()) {
-LLVM_DEBUG({
-  llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n",
- PendingAccept.size());
-  for (const auto  : PendingAccept)
-llvm::dbgs() << "  - " << G.symbolName(Accept.Head->Payload->symbol())
- << "\n";
-});
-assert(PendingAccept.size() == 1);
-return *PendingAccept.front().Head->Payload;
-  }
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);

sammccall wrote:
> rather than mingling this with the glrReduce, I'd suggest collecting the set 
> of final heads first and then analyzing them afterwards.
> 
> This means looping a second time, but I think the logic around recognizing 
> patterns that look like `accept` might grow (e.g. if we want to incorporate 
> some error tolerance)
that sounds reasonable, but it requires some additional changes (to identify 
inactive heads in the callback of `glrReduce`):

- AddSteps now returns true/false whether there is any actions in 
LRTable[NewHead->state][nextTok];
- no available action in the acceptable state on the eof token in LRTable (see 
the change in lr-build-basic.test);


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-08 Thread Haojian Wu via Phabricator via cfe-commits
hokein updated this revision to Diff 435103.
hokein added a comment.

pull out the logic of detecting acceptable state out of glrReduce callback.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
  clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
  clang-tools-extra/pseudo/test/lr-build-basic.test
  clang-tools-extra/pseudo/test/lr-build-conflicts.test
  clang-tools-extra/pseudo/unittests/GLRTest.cpp
  clang-tools-extra/pseudo/unittests/LRTableTest.cpp

Index: clang-tools-extra/pseudo/unittests/LRTableTest.cpp
===
--- clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -33,7 +33,7 @@
   std::vector Entries = {
   {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
   {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
-  {/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)},
+  {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
   {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
   GrammarTable GT;
   LRTable T = LRTable::buildForTests(GT, Entries);
@@ -41,7 +41,7 @@
   EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
-  UnorderedElementsAre(Action::accept(2)));
+  UnorderedElementsAre(Action::reduce(2)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
   EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::reduce(1)));
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -393,6 +393,29 @@
 "[  0, end) └─IDENTIFIER := tok[0]\n");
 }
 
+TEST_F(GLRTest, NoExplicitAccept) {
+  build(R"bnf(
+_ := test
+
+test := IDENTIFIER test
+test := IDENTIFIER
+  )bnf");
+  clang::LangOptions LOptions;
+  // Given the following input, and the grammar above, we perform two reductions
+  // of the nonterminal `test` when the next token is `eof`, verify that the
+  // parser stops at the right state.
+  const TokenStream  = cook(lex("id id", LOptions), LOptions);
+  auto LRTable = LRTable::buildSLR(*G);
+
+  const ForestNode  =
+  glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*G),
+"[  0, end) test := IDENTIFIER test\n"
+"[  0,   1) ├─IDENTIFIER := tok[0]\n"
+"[  1, end) └─test := IDENTIFIER\n"
+"[  1, end)   └─IDENTIFIER := tok[1]\n");
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -33,7 +33,6 @@
 # TABLE-NEXT: 'IDENTIFIER': shift state 2
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
 # TABLE-NEXT: '-': shift state 3
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -22,7 +22,6 @@
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: 'id': go to state 2
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
 # TABLE-NEXT: State 3
Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
===
--- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -124,11 +124,11 @@
   auto FollowSets = followSets(G);
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
 for (const Item  : Graph.states()[SID].Items) {
-  // If we've just parsed the start symbol, we can accept the input.
-  if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
-Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
+  // If we've just parsed the start symbol, this means we successfully parse
+  // the input. We don't add the reduce action of `_ := start_symbol` in the
+  // LRTable (the GLR parser handles it specifically).
+  if 

[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-06-07 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added a comment.
This revision is now accepted and ready to land.

This is a great simplification, thanks!




Comment at: clang-tools-extra/pseudo/lib/GLR.cpp:85
 
-  if (!PendingAccept.empty()) {
-LLVM_DEBUG({
-  llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n",
- PendingAccept.size());
-  for (const auto  : PendingAccept)
-llvm::dbgs() << "  - " << G.symbolName(Accept.Head->Payload->symbol())
- << "\n";
-});
-assert(PendingAccept.size() == 1);
-return *PendingAccept.front().Head->Payload;
-  }
+  const ForestNode *Result = nullptr;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);

rather than mingling this with the glrReduce, I'd suggest collecting the set of 
final heads first and then analyzing them afterwards.

This means looping a second time, but I think the logic around recognizing 
patterns that look like `accept` might grow (e.g. if we want to incorporate 
some error tolerance)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125677/new/

https://reviews.llvm.org/D125677

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125677: [pseudo] Remove the explicit Accept actions.

2022-05-16 Thread Haojian Wu via Phabricator via cfe-commits
hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: alextsao1999.
Herald added a project: clang-tools-extra.

As pointed out in the previous review section, having a dedicated accept
action doesn't seem to be necessary. This patch implements the the same behavior
without accept acction, which will save some code complexity.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125677

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/LRTable.cpp
  clang-tools-extra/pseudo/lib/LRTableBuild.cpp
  clang-tools-extra/pseudo/test/lr-build-basic.test
  clang-tools-extra/pseudo/test/lr-build-conflicts.test
  clang-tools-extra/pseudo/unittests/GLRTest.cpp
  clang-tools-extra/pseudo/unittests/LRTableTest.cpp

Index: clang-tools-extra/pseudo/unittests/LRTableTest.cpp
===
--- clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -33,7 +33,7 @@
   std::vector Entries = {
   {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
   {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
-  {/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)},
+  {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
   {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
   GrammarTable GT;
   LRTable T = LRTable::buildForTests(GT, Entries);
@@ -41,7 +41,7 @@
   EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
-  UnorderedElementsAre(Action::accept(2)));
+  UnorderedElementsAre(Action::reduce(2)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
   EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
   UnorderedElementsAre(Action::reduce(1)));
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -393,6 +393,29 @@
 "[  0, end) └─IDENTIFIER := tok[0]\n");
 }
 
+TEST_F(GLRTest, NoExplicitAccept) {
+  build(R"bnf(
+_ := test
+
+test := IDENTIFIER test
+test := IDENTIFIER
+  )bnf");
+  clang::LangOptions LOptions;
+  // Given the following input, and the grammar above, we perform two reductions
+  // of the nonterminal `test` when the next token is `eof`, verify that the
+  // parser stops at the right state.
+  const TokenStream  = cook(lex("id id", LOptions), LOptions);
+  auto LRTable = LRTable::buildSLR(*G);
+
+  const ForestNode  =
+  glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*G),
+"[  0, end) test := IDENTIFIER test\n"
+"[  0,   1) ├─IDENTIFIER := tok[0]\n"
+"[  1, end) └─test := IDENTIFIER\n"
+"[  1, end)   └─IDENTIFIER := tok[1]\n");
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -33,7 +33,7 @@
 # TABLE-NEXT: 'IDENTIFIER': shift state 2
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
+# TABLE-NEXT: 'EOF': reduce by rule 2 '_ := expr'
 # TABLE-NEXT: '-': shift state 3
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -22,7 +22,7 @@
 # TABLE-NEXT: 'expr': go to state 1
 # TABLE-NEXT: 'id': go to state 2
 # TABLE-NEXT: State 1
-# TABLE-NEXT: 'EOF': accept
+# TABLE-NEXT: 'EOF': reduce by rule 2 '_ := expr'
 # TABLE-NEXT: State 2
 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
 # TABLE-NEXT: State 3
Index: clang-tools-extra/pseudo/lib/LRTableBuild.cpp
===
--- clang-tools-extra/pseudo/lib/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/LRTableBuild.cpp
@@ -124,11 +124,6 @@
   auto FollowSets = followSets(G);
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
 for (const Item  : Graph.states()[SID].Items) {
-  // If we've just parsed the start symbol, we can accept the input.
-  if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
-Build.insert({SID,