ioeric created this revision.
ioeric added a reviewer: sammccall.
Herald added subscribers: cfe-commits, jkorous-apple, ilya-biryukov, klimek.

Currently, we pick the first declaration of a symbol in a TU, which is 
considered
canonical in the clangIndex, as the canonical declaration in clangd. This causes
forward declarations that might appear in a random header to be used as a
canonical declaration, which is not desirable for features like 
go-to-declaration
or include insertion.

For example, for class X, we would consider the forward declaration in fwd.h to
be the canonical declaration, while the preferred canonical declaration should
be the actual definition in x.h.

  // fwd.h
  class X;  // forward decl
  
  // x.h
  class X {};

This patch fixes the issue by making symbol collector favor the actual 
definition of
a TagDecl (i.e. class/struct/enum/union) found in a header file over the first 
seen
declarations in a TU. Other symbol types like functions are not handled because
using the first seen declarations as canonical declarations is usually a good
heuristic for them.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D43823

Files:
  clangd/index/FileIndex.cpp
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -47,6 +47,7 @@
 }
 MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
 MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; }
+MATCHER_P(DefURI, P, "") { return arg.Definition.FileURI == P; }
 MATCHER_P(IncludeHeader, P, "") {
   return arg.Detail && arg.Detail->IncludeHeader == P;
 }
@@ -152,17 +153,15 @@
 };
 
 TEST_F(SymbolCollectorTest, CollectSymbols) {
-  CollectorOpts.IndexMainFiles = true;
   const std::string Header = R"(
     class Foo {
       void f();
     };
     void f1();
     inline void f2() {}
     static const int KInt = 2;
     const char* kStr = "123";
-  )";
-  const std::string Main = R"(
+
     namespace {
     void ff() {} // ignore
     }
@@ -190,7 +189,7 @@
     using bar::v2;
     } // namespace foo
   )";
-  runSymbolCollector(Header, Main);
+  runSymbolCollector(Header, /*Main=*/"");
   EXPECT_THAT(Symbols,
               UnorderedElementsAreArray(
                   {QName("Foo"), QName("f1"), QName("f2"), QName("KInt"),
@@ -200,7 +199,6 @@
 }
 
 TEST_F(SymbolCollectorTest, Locations) {
-  CollectorOpts.IndexMainFiles = true;
   Annotations Header(R"cpp(
     // Declared in header, defined in main.
     extern int $xdecl[[X]];
@@ -214,34 +212,28 @@
     int $xdef[[X]] = 42;
     class $clsdef[[Cls]] {};
     void $printdef[[print]]() {}
-
-    // Declared/defined in main only.
-    int $y[[Y]];
   )cpp");
   runSymbolCollector(Header.code(), Main.code());
   EXPECT_THAT(
       Symbols,
       UnorderedElementsAre(
           AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")),
                 DefRange(Main.offsetRange("xdef"))),
-          AllOf(QName("Cls"), DeclRange(Header.offsetRange("clsdecl")),
+          AllOf(QName("Cls"), DeclURI(TestHeaderURI),
+                DeclRange(Header.offsetRange("clsdecl")),
                 DefRange(Main.offsetRange("clsdef"))),
           AllOf(QName("print"), DeclRange(Header.offsetRange("printdecl")),
                 DefRange(Main.offsetRange("printdef"))),
-          AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl"))),
-          AllOf(QName("Y"), DeclRange(Main.offsetRange("y")),
-                DefRange(Main.offsetRange("y")))));
+          AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl")))));
 }
 
 TEST_F(SymbolCollectorTest, SymbolRelativeNoFallback) {
-  CollectorOpts.IndexMainFiles = false;
   runSymbolCollector("class Foo {};", /*Main=*/"");
   EXPECT_THAT(Symbols,
               UnorderedElementsAre(AllOf(QName("Foo"), DeclURI(TestHeaderURI))));
 }
 
 TEST_F(SymbolCollectorTest, SymbolRelativeWithFallback) {
-  CollectorOpts.IndexMainFiles = false;
   TestHeaderName = "x.h";
   TestFileName = "x.cpp";
   TestHeaderURI = URI::createFile(testPath(TestHeaderName)).toString();
@@ -253,7 +245,6 @@
 
 #ifndef LLVM_ON_WIN32
 TEST_F(SymbolCollectorTest, CustomURIScheme) {
-  CollectorOpts.IndexMainFiles = false;
   // Use test URI scheme from URITests.cpp
   CollectorOpts.URISchemes.insert(CollectorOpts.URISchemes.begin(), "unittest");
   TestHeaderName = testPath("test-root/x.h");
@@ -265,24 +256,21 @@
 #endif
 
 TEST_F(SymbolCollectorTest, InvalidURIScheme) {
-  CollectorOpts.IndexMainFiles = false;
   // Use test URI scheme from URITests.cpp
   CollectorOpts.URISchemes = {"invalid"};
   runSymbolCollector("class Foo {};", /*Main=*/"");
   EXPECT_THAT(Symbols, UnorderedElementsAre(AllOf(QName("Foo"), DeclURI(""))));
 }
 
 TEST_F(SymbolCollectorTest, FallbackToFileURI) {
-  CollectorOpts.IndexMainFiles = false;
   // Use test URI scheme from URITests.cpp
   CollectorOpts.URISchemes = {"invalid", "file"};
   runSymbolCollector("class Foo {};", /*Main=*/"");
   EXPECT_THAT(Symbols, UnorderedElementsAre(
                            AllOf(QName("Foo"), DeclURI(TestHeaderURI))));
 }
 
 TEST_F(SymbolCollectorTest, IncludeEnums) {
-  CollectorOpts.IndexMainFiles = false;
   const std::string Header = R"(
     enum {
       Red
@@ -306,7 +294,6 @@
 }
 
 TEST_F(SymbolCollectorTest, IgnoreNamelessSymbols) {
-  CollectorOpts.IndexMainFiles = false;
   const std::string Header = R"(
     struct {
       int a;
@@ -318,7 +305,6 @@
 }
 
 TEST_F(SymbolCollectorTest, SymbolFormedFromMacro) {
-  CollectorOpts.IndexMainFiles = false;
 
   Annotations Header(R"(
     #define FF(name) \
@@ -342,33 +328,7 @@
                 DeclURI(TestHeaderURI))));
 }
 
-TEST_F(SymbolCollectorTest, SymbolFormedFromMacroInMainFile) {
-  CollectorOpts.IndexMainFiles = true;
-
-  Annotations Main(R"(
-    #define FF(name) \
-      class name##_Test {};
-
-    $expansion[[FF]](abc);
-
-    #define FF2() \
-      class $spelling[[Test]] {};
-
-    FF2();
-  )");
-  runSymbolCollector(/*Header=*/"", Main.code());
-  EXPECT_THAT(
-      Symbols,
-      UnorderedElementsAre(
-          AllOf(QName("abc_Test"), DeclRange(Main.offsetRange("expansion")),
-                DeclURI(TestFileURI)),
-          AllOf(QName("Test"), DeclRange(Main.offsetRange("spelling")),
-                DeclURI(TestFileURI))));
-}
-
 TEST_F(SymbolCollectorTest, SymbolFormedByCLI) {
-  CollectorOpts.IndexMainFiles = false;
-
   Annotations Header(R"(
     #ifdef NAME
     class $expansion[[NAME]] {};
@@ -384,7 +344,6 @@
 }
 
 TEST_F(SymbolCollectorTest, IgnoreSymbolsInMainFile) {
-  CollectorOpts.IndexMainFiles = false;
   const std::string Header = R"(
     class Foo {};
     void f1();
@@ -402,25 +361,6 @@
               UnorderedElementsAre(QName("Foo"), QName("f1"), QName("f2")));
 }
 
-TEST_F(SymbolCollectorTest, IncludeSymbolsInMainFile) {
-  CollectorOpts.IndexMainFiles = true;
-  const std::string Header = R"(
-    class Foo {};
-    void f1();
-    inline void f2() {}
-  )";
-  const std::string Main = R"(
-    namespace {
-    void ff() {} // ignore
-    }
-    void main_f() {}
-    void f1() {}
-  )";
-  runSymbolCollector(Header, Main);
-  EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("f1"),
-                                            QName("f2"), QName("main_f")));
-}
-
 TEST_F(SymbolCollectorTest, IgnoreClassMembers) {
   const std::string Header = R"(
     class Foo {
@@ -620,6 +560,44 @@
                                  IncludeHeader("\"the/good/header.h\""))));
 }
 
+TEST_F(SymbolCollectorTest, AvoidUsingFwdDeclsAsCanonicalDecls) {
+  CollectorOpts.CollectIncludePath = true;
+  Annotations Header(R"(
+    // Forward declarations of TagDecls.
+    class C;
+    struct S;
+    union U;
+
+    // Canonical declarations.
+    class $cdecl[[C]] {};
+    struct $sdecl[[S]] {};
+    union $udecl[[U]] {int x; bool y;};
+  )");
+  runSymbolCollector(Header.code(), /*Main=*/"");
+  EXPECT_THAT(Symbols,
+              UnorderedElementsAre(
+                  AllOf(QName("C"), DeclURI(TestHeaderURI),
+                        DeclRange(Header.offsetRange("cdecl")),
+                        IncludeHeader(TestHeaderURI), DefURI(TestHeaderURI),
+                        DefRange(Header.offsetRange("cdecl"))),
+                  AllOf(QName("S"), DeclURI(TestHeaderURI),
+                        DeclRange(Header.offsetRange("sdecl")),
+                        IncludeHeader(TestHeaderURI), DefURI(TestHeaderURI),
+                        DefRange(Header.offsetRange("sdecl"))),
+                  AllOf(QName("U"), DeclURI(TestHeaderURI),
+                        DeclRange(Header.offsetRange("udecl")),
+                        IncludeHeader(TestHeaderURI), DefURI(TestHeaderURI),
+                        DefRange(Header.offsetRange("udecl")))));
+}
+
+TEST_F(SymbolCollectorTest, ClassForwardDeclarationIsCanonical) {
+  CollectorOpts.CollectIncludePath = true;
+  runSymbolCollector(/*Header=*/"class X;", /*Main=*/"class X {};");
+  EXPECT_THAT(Symbols, UnorderedElementsAre(AllOf(
+                           QName("X"), DeclURI(TestHeaderURI),
+                           IncludeHeader(TestHeaderURI), DefURI(TestFileURI))));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -20,17 +20,18 @@
 
 /// \brief Collect top-level symbols from an AST. These are symbols defined
 /// immediately inside a namespace or a translation unit scope. For example,
-/// symbols in classes or functions are not collected.
+/// symbols in classes or functions are not collected. Note that this only
+/// collects symbols that declared in at least one file that is not a main
+/// file (i.e. the source file corresponding to a TU). These are symbols that
+/// can be imported by other files by including the file where symbols are
+/// declared.
 ///
 /// Clients (e.g. clangd) can use SymbolCollector together with
 /// index::indexTopLevelDecls to retrieve all symbols when the source file is
 /// changed.
 class SymbolCollector : public index::IndexDataConsumer {
 public:
   struct Options {
-    /// Whether to collect symbols in main files (e.g. the source file
-    /// corresponding to a TU).
-    bool IndexMainFiles = false;
     /// When symbol paths cannot be resolved to absolute paths (e.g. files in
     /// VFS that does not have absolute path), combine the fallback directory
     /// with symbols' paths to get absolute paths. This must be an absolute
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -115,9 +115,7 @@
   //   * enum constants in unscoped enum decl (e.g. "red" in "enum {red};")
   auto InTopLevelScope = hasDeclContext(
       anyOf(namespaceDecl(), translationUnitDecl(), linkageSpecDecl()));
-  if (match(decl(allOf(Opts.IndexMainFiles
-                           ? decl()
-                           : decl(unless(isExpansionInMainFile())),
+  if (match(decl(allOf(unless(isExpansionInMainFile()),
                        anyOf(InTopLevelScope,
                              hasDeclContext(enumDecl(InTopLevelScope,
                                                      unless(isScoped())))))),
@@ -209,6 +207,19 @@
   return std::move(Result);
 }
 
+// Checks whether \p ND is a definition of a TagDecl (class/struct/enum/union)
+// in a header file, in which case clangd would prefer to use ND as a canonical
+// declaration.
+// FIXME: handle symbol types that are not TagDecl (e.g. functions), if using
+// the the first seen declaration as canonical declaration is not a good enough
+// heuristic.
+bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
+  using namespace clang::ast_matchers;
+  return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
+         llvm::isa<TagDecl>(&ND) &&
+         match(decl(isExpansionInMainFile()), ND, ND.getASTContext()).empty();
+}
+
 } // namespace
 
 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
@@ -241,12 +252,16 @@
     if (index::generateUSRForDecl(ND, USR))
       return true;
 
+    const NamedDecl &OriginalDecl = *cast<NamedDecl>(ASTNode.OrigD);
     auto ID = SymbolID(USR);
     const Symbol* BasicSymbol = Symbols.find(ID);
     if (!BasicSymbol) // Regardless of role, ND is the canonical declaration.
       BasicSymbol = addDeclaration(*ND, std::move(ID));
+    else if (isPreferredDeclaration(OriginalDecl, Roles))
+      BasicSymbol = addDeclaration(OriginalDecl, std::move(ID));
+
     if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
-      addDefinition(*cast<NamedDecl>(ASTNode.OrigD), *BasicSymbol);
+      addDefinition(OriginalDecl, *BasicSymbol);
   }
   return true;
 }
Index: clangd/index/FileIndex.cpp
===================================================================
--- clangd/index/FileIndex.cpp
+++ clangd/index/FileIndex.cpp
@@ -23,8 +23,6 @@
   // Although we do not index symbols in main files (e.g. cpp file), information
   // in main files like definition locations of class declarations will still be
   // collected; thus, the index works for go-to-definition.
-  // FIXME(ioeric): get rid of `IndexMainFiles` as this is always set to false.
-  CollectorOpts.IndexMainFiles = false;
   // FIXME(ioeric): we might also want to collect include headers. We would need
   // to make sure all includes are canonicalized (with CanonicalIncludes), which
   // is not trivial given the current way of collecting symbols: we only have
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to