sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added subscribers: usaxena95, kadircet.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999, ilya-biryukov.
Herald added a project: clang-tools-extra.

Motivating case: `foo bar;` is not a declaration of nothing with `foo` and `bar`
both types.

This is a common and critical ambiguity, clangd/AST.cpp has 20% fewer
ambiguous nodes (1674->1332) after this change.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D130337

Files:
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/cxx/cxx.bnf
  clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
  clang-tools-extra/pseudo/test/fuzzer.cpp

Index: clang-tools-extra/pseudo/test/fuzzer.cpp
===================================================================
--- clang-tools-extra/pseudo/test/fuzzer.cpp
+++ clang-tools-extra/pseudo/test/fuzzer.cpp
@@ -1,4 +1,4 @@
 // RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s
 int x;
 // CHECK: translation-unit := declaration-seq
-// CHECK: simple-type-specifier := INT
+// CHECK: builtin-type := INT
Index: clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
@@ -0,0 +1,27 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+
+// not parsed as Type{foo} Type{bar}
+foo bar;
+// CHECK-NOT: simple-declaration := decl-specifier-seq ;
+// CHECK:    simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK:    ├─decl-specifier-seq~simple-type-specifier
+// CHECK:    ├─init-declarator-list~IDENTIFIER
+// CHECK:    └─;
+// CHECK-NOT: simple-declaration := decl-specifier-seq ;
+
+// not parsed as Type{std} Type{::string} Declarator{s};
+std::string s;
+// CHECK-NOT: nested-name-specifier := ::
+// CHECK:     simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK:     ├─decl-specifier-seq~simple-type-specifier := <ambiguous>
+// CHECK:     │ ├─simple-type-specifier := nested-name-specifier type-name
+// CHECK:     │ │ ├─nested-name-specifier := <ambiguous> #1
+// CHECK:     │ │ │ ├─nested-name-specifier := type-name ::
+// CHECK:     │ │ │ └─nested-name-specifier := namespace-name ::
+// CHECK:     │ │ └─type-name
+// CHECK:     │ └─simple-type-specifier := nested-name-specifier template-name
+// CHECK:     │   ├─nested-name-specifier =#1
+// CHECK:     │   └─template-name~IDENTIFIER
+// CHECK:     ├─init-declarator-list~IDENTIFIER
+// CHECK:     └─;
+// CHECK-NOT: nested-name-specifier := ::
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -350,7 +350,7 @@
 decl-specifier := CONSTINIT
 decl-specifier := INLINE
 decl-specifier-seq := decl-specifier
-decl-specifier-seq := decl-specifier decl-specifier-seq
+decl-specifier-seq := decl-specifier decl-specifier-seq [guard]
 storage-class-specifier := STATIC
 storage-class-specifier := THREAD_LOCAL
 storage-class-specifier := EXTERN
@@ -364,31 +364,32 @@
 type-specifier := typename-specifier
 type-specifier := cv-qualifier
 type-specifier-seq := type-specifier
-type-specifier-seq := type-specifier type-specifier-seq
+type-specifier-seq := type-specifier type-specifier-seq [guard]
 defining-type-specifier := type-specifier
 defining-type-specifier := class-specifier
 defining-type-specifier := enum-specifier
 defining-type-specifier-seq := defining-type-specifier
-defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq
+defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq [guard]
 simple-type-specifier := nested-name-specifier_opt type-name
 simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id
 simple-type-specifier := decltype-specifier
 simple-type-specifier := placeholder-type-specifier
 simple-type-specifier := nested-name-specifier_opt template-name
-simple-type-specifier := CHAR
-simple-type-specifier := CHAR8_T
-simple-type-specifier := CHAR16_T
-simple-type-specifier := CHAR32_T
-simple-type-specifier := WCHAR_T
-simple-type-specifier := BOOL
+simple-type-specifier := builtin-type
+builtin-type := CHAR
+builtin-type := CHAR8_T
+builtin-type := CHAR16_T
+builtin-type := CHAR32_T
+builtin-type := WCHAR_T
+builtin-type := BOOL
 simple-type-specifier := SHORT
-simple-type-specifier := INT
+builtin-type := INT
 simple-type-specifier := LONG
 simple-type-specifier := SIGNED
 simple-type-specifier := UNSIGNED
-simple-type-specifier := FLOAT
-simple-type-specifier := DOUBLE
-simple-type-specifier := VOID
+builtin-type := FLOAT
+builtin-type := DOUBLE
+builtin-type := VOID
 type-name := class-name
 type-name := enum-name
 type-name := typedef-name
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -14,7 +14,9 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Debug.h"
 #include <utility>
+#define DEBUG_TYPE "CXX.cpp"
 
 namespace clang {
 namespace pseudo {
@@ -160,7 +162,110 @@
   return symbolToToken(P.Lookahead) != tok::kw_else;
 }
 
+// Whether this e.g. decl-specifier contains an "exclusive" type such as a class
+// name, and thus can't combine with a second exclusive type.
+//
+// Returns false for
+//  - non-types
+//  - "unsigned" etc that may suffice as types but may modify others
+//  - cases of uncertainty (e.g. due to ambiguity)
+bool hasExclusiveType(const ForestNode *N) {
+  // FIXME: every time we apply this check, we walk the whole subtree.
+  // Add per-node caching instead.
+  while (true) {
+    assert(N->symbol() == (SymbolID)Symbol::decl_specifier_seq ||
+           N->symbol() == (SymbolID)Symbol::type_specifier_seq ||
+           N->symbol() == (SymbolID)Symbol::defining_type_specifier_seq ||
+           N->symbol() == (SymbolID)Symbol::decl_specifier ||
+           N->symbol() == (SymbolID)Symbol::type_specifier ||
+           N->symbol() == (SymbolID)Symbol::defining_type_specifier ||
+           N->symbol() == (SymbolID)Symbol::simple_type_specifier);
+    if (N->kind() == ForestNode::Opaque)
+      return false; // conservatively.
+    if (N->kind() == ForestNode::Ambiguous) {
+      // Conservatively, this has a type if all options do.
+      for (const auto* Alt : N->alternatives())
+        if (!hasExclusiveType(Alt))
+          return false;
+      return true;
+    }
+    // All supported symbols are nonterminals.
+    assert(N->kind() == ForestNode::Sequence);
+    switch (N->rule()) {
+      // seq := element seq: check element then continue into seq
+      case (RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq:
+      case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq:
+      case (RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq:
+        if (hasExclusiveType(N->children().front()))
+          return true;
+        N = N->children().back();
+        continue;
+      // seq := element: continue into element
+      case (RuleID)Rule::decl_specifier_seq_0decl_specifier:
+      case (RuleID)Rule::type_specifier_seq_0type_specifier:
+      case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier:
+        N = N->children().front();
+        continue;
+
+      // defining-type-specifier
+      case (RuleID)Rule::defining_type_specifier_0type_specifier:
+        N = N->children().front();
+        continue;
+      case (RuleID)Rule::defining_type_specifier_0class_specifier:
+      case (RuleID)Rule::defining_type_specifier_0enum_specifier:
+        return true;
+
+      // decl-specifier
+      case (RuleID)Rule::decl_specifier_0defining_type_specifier:
+        N = N->children().front();
+        continue;
+      case (RuleID)Rule::decl_specifier_0consteval:
+      case (RuleID)Rule::decl_specifier_0constexpr:
+      case (RuleID)Rule::decl_specifier_0constinit:
+      case (RuleID)Rule::decl_specifier_0inline:
+      case (RuleID)Rule::decl_specifier_0friend:
+      case (RuleID)Rule::decl_specifier_0storage_class_specifier:
+      case (RuleID)Rule::decl_specifier_0typedef:
+      case (RuleID)Rule::decl_specifier_0function_specifier:
+        return false;
+
+      // type-specifier
+      case (RuleID)Rule::type_specifier_0elaborated_type_specifier:
+      case (RuleID)Rule::type_specifier_0typename_specifier:
+      case (RuleID)Rule::type_specifier_0cv_qualifier:
+        return false;
+      case (RuleID)Rule::type_specifier_0simple_type_specifier:
+        N = N->children().front();
+        continue;
+
+      // simple-type-specifier
+      case (RuleID)Rule::simple_type_specifier_0type_name:
+      case (RuleID)Rule::simple_type_specifier_0template_name:
+      case (RuleID)Rule::simple_type_specifier_0builtin_type:
+      case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_2simple_template_id:
+      case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_name:
+      case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1type_name:
+      case (RuleID)Rule::simple_type_specifier_0decltype_specifier:
+      case (RuleID)Rule::simple_type_specifier_0placeholder_type_specifier:
+        return true;
+      case (RuleID)Rule::simple_type_specifier_0long:
+      case (RuleID)Rule::simple_type_specifier_0short:
+      case (RuleID)Rule::simple_type_specifier_0signed:
+      case (RuleID)Rule::simple_type_specifier_0unsigned:
+        return false;
+
+      default:
+        LLVM_DEBUG(llvm::errs() << "Unhandled rule " << N->rule() << "\n");
+        llvm_unreachable("hasExclusiveType be exhaustive!");
+    }
+  }
+}
+
 llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+#define GUARD(cond)                                                            \
+  {                                                                            \
+    [](const GuardParams &P) { return cond; }                                  \
+  }
 #define TOKEN_GUARD(kind, cond)                                                \
   [](const GuardParams& P) {                                                   \
     const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens);                  \
@@ -177,6 +282,19 @@
       {(RuleID)Rule::non_function_declarator_0declarator,
        SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))},
 
+      // A {decl,type,defining-type}-specifier-sequence cannot have multiple
+      // "exclusive" types (like class names): a value has only one type.
+      {(RuleID)Rule::
+           defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq,
+       GUARD(!hasExclusiveType(P.RHS.front()) ||
+             !hasExclusiveType(P.RHS.back()))},
+      {(RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq,
+       GUARD(!hasExclusiveType(P.RHS.front()) ||
+             !hasExclusiveType(P.RHS.back()))},
+      {(RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq,
+       GUARD(!hasExclusiveType(P.RHS.front()) ||
+             !hasExclusiveType(P.RHS.back()))},
+
       {(RuleID)Rule::contextual_override_0identifier,
        TOKEN_GUARD(identifier, Tok.text() == "override")},
       {(RuleID)Rule::contextual_final_0identifier,
@@ -190,10 +308,12 @@
       {(RuleID)Rule::contextual_zero_0numeric_constant,
        TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
 
-      {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
-        guardNextTokenNotElse},
-      {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
-        guardNextTokenNotElse},
+      {(RuleID)Rule::
+           selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
+       guardNextTokenNotElse},
+      {(RuleID)Rule::
+           selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
+       guardNextTokenNotElse},
 
       // The grammar distinguishes (only) user-defined vs plain string literals,
       // where the clang lexer distinguishes (only) encoding types.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to