Author: Haojian Wu
Date: 2022-07-22T09:13:09+02:00
New Revision: 2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d

URL: 
https://github.com/llvm/llvm-project/commit/2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d
DIFF: 
https://github.com/llvm/llvm-project/commit/2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d.diff

LOG: [pseudo] Eliminate the dangling-else syntax ambiguity.

- the grammar ambiguity is eliminated by a guard;
- modify the guard function signatures, now all parameters are folded in
  to a single object, avoid a long parameter list (as we will add more
  parameters in the near future);

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D130160

Added: 
    clang-tools-extra/pseudo/test/cxx/dangling-else.cpp

Modified: 
    clang-tools-extra/pseudo/include/clang-pseudo/Language.h
    clang-tools-extra/pseudo/lib/GLR.cpp
    clang-tools-extra/pseudo/lib/cxx/CXX.cpp
    clang-tools-extra/pseudo/lib/cxx/cxx.bnf
    clang-tools-extra/pseudo/unittests/GLRTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h 
b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
index 3696543915cba..1a2b71f081da0 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -19,6 +19,12 @@ class ForestNode;
 class TokenStream;
 class LRTable;
 
+struct GuardParams {
+  llvm::ArrayRef<const ForestNode *> RHS;
+  const TokenStream &Tokens;
+  // FIXME: use the index of Tokens.
+  SymbolID Lookahead;
+};
 // A guard restricts when a grammar rule can be used.
 //
 // The GLR parser will use the guard to determine whether a rule reduction will
@@ -26,8 +32,7 @@ class LRTable;
 // `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`.
 //
 // Return true if the guard is satisfied.
-using RuleGuard = llvm::function_ref<bool(
-    llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
+using RuleGuard = llvm::function_ref<bool(const GuardParams &)>;
 
 // A recovery strategy determines a region of code to skip when parsing fails.
 //

diff  --git a/clang-tools-extra/pseudo/lib/GLR.cpp 
b/clang-tools-extra/pseudo/lib/GLR.cpp
index df8381d04326b..ab230accdf8f8 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -421,7 +421,7 @@ class GLRReduce {
     if (!R.Guarded)
       return true;
     if (auto Guard = Lang.Guards.lookup(RID))
-      return Guard(RHS, Params.Code);
+      return Guard({RHS, Params.Code, Lookahead});
     LLVM_DEBUG(llvm::dbgs()
                << llvm::formatv("missing guard implementation for rule {0}\n",
                                 Lang.G.dumpRule(RID)));

diff  --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp 
b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 8fa24bfbbd0b5..7fc3a48d63189 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -156,15 +156,19 @@ bool isFunctionDeclarator(const ForestNode *Declarator) {
   llvm_unreachable("unreachable");
 }
 
+bool guardNextTokenNotElse(const GuardParams &P) {
+  return symbolToToken(P.Lookahead) != tok::kw_else;
+}
+
 llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
 #define TOKEN_GUARD(kind, cond)                                                
\
-  [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {      
\
-    const Token &Tok = onlyToken(tok::kind, RHS, Tokens);                      
\
+  [](const GuardParams& P) {                                                   
\
+    const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens);                  
\
     return cond;                                                               
\
   }
 #define SYMBOL_GUARD(kind, cond)                                               
\
-  [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {      
\
-    const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens);     
\
+  [](const GuardParams& P) {                                                   
\
+    const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, P.RHS, P.Tokens); 
\
     return cond;                                                               
\
   }
   return {
@@ -186,6 +190,11 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
       {(RuleID)Rule::contextual_zero_0numeric_constant,
        TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
 
+      
{(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
+        guardNextTokenNotElse},
+      
{(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
+        guardNextTokenNotElse},
+
       // The grammar distinguishes (only) user-defined vs plain string 
literals,
       // where the clang lexer distinguishes (only) encoding types.
       {(RuleID)Rule::user_defined_string_literal_chunk_0string_literal,

diff  --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf 
b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index d49fb8fb7cf42..8138d0fd481ed 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -290,7 +290,7 @@ expression-statement := expression_opt ;
 compound-statement := { statement-seq_opt [recover=Brackets] }
 statement-seq := statement
 statement-seq := statement-seq statement
-selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) 
statement
+selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) 
statement [guard]
 selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) 
statement ELSE statement
 selection-statement := SWITCH ( init-statement_opt condition ) statement
 iteration-statement := WHILE ( condition ) statement

diff  --git a/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp 
b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
new file mode 100644
index 0000000000000..151f3931b53f9
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
@@ -0,0 +1,22 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --start-symbol=statement-seq 
--print-forest | FileCheck %s
+
+// Verify the else should belong to the nested if statement
+if (true) if (true) {} else {}
+
+// CHECK:      statement-seq~selection-statement := IF ( condition ) statement
+// CHECK-NEXT: ├─IF
+// CHECK-NEXT: ├─(
+// CHECK-NEXT: ├─condition~TRUE
+// CHECK-NEXT: ├─)
+// CHECK-NEXT: └─statement~selection-statement
+// CHECK-NEXT:   ├─IF
+// CHECK-NEXT:   ├─(
+// CHECK-NEXT:   ├─condition~TRUE
+// CHECK-NEXT:   ├─)
+// CHECK-NEXT:   ├─statement~compound-statement := { }
+// CHECK-NEXT:   │ ├─{
+// CHECK-NEXT:   │ └─}
+// CHECK-NEXT:   ├─ELSE
+// CHECK-NEXT:   └─statement~compound-statement := { }
+// CHECK-NEXT:     ├─{
+// CHECK-NEXT:     └─}

diff  --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp 
b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 2c3ef265de392..5f87efec67044 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -634,11 +634,12 @@ TEST_F(GLRTest, GuardExtension) {
     start := IDENTIFIER [guard]
   )bnf");
   TestLang.Guards.try_emplace(
-      ruleFor("start"),
-      [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
-        assert(RHS.size() == 1 &&
-               RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
-        return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == 
"test";
+      ruleFor("start"), [&](const GuardParams &P) {
+        assert(P.RHS.size() == 1 &&
+               P.RHS.front()->symbol() ==
+                   tokenSymbol(clang::tok::identifier));
+        return P.Tokens.tokens()[P.RHS.front()->startTokenIndex()]
+                   .text() == "test";
       });
   clang::LangOptions LOptions;
   TestLang.Table = LRTable::buildSLR(TestLang.G);


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to