sammccall updated this revision to Diff 133024.
sammccall added a comment.

Add tests for SymbolCollector (finding def/decl locations) and merge.

Found some bugs in SymbolCollector's locations - added fixmes.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D42942

Files:
  clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/Merge.cpp
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  clangd/index/SymbolYAML.cpp
  clangd/index/SymbolYAML.h
  clangd/tool/ClangdMain.cpp
  unittests/clangd/IndexTests.cpp
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -46,13 +46,19 @@
   return arg.CompletionSnippetInsertText == S;
 }
 MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
+MATCHER_P(CPath, P, "") { return arg.CanonicalDeclaration.FilePath == P; }
 MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; }
-MATCHER_P(LocationOffsets, Offsets, "") {
+MATCHER_P(DeclRange, Offsets, "") {
   // Offset range in SymbolLocation is [start, end] while in Clangd is [start,
   // end).
+  // FIXME: SymbolLocation should be [start, end).
   return arg.CanonicalDeclaration.StartOffset == Offsets.first &&
       arg.CanonicalDeclaration.EndOffset == Offsets.second - 1;
 }
+MATCHER_P(DefRange, Offsets, "") {
+  return arg.Definition.StartOffset == Offsets.first &&
+         arg.Definition.EndOffset == Offsets.second - 1;
+}
 
 namespace clang {
 namespace clangd {
@@ -96,24 +102,22 @@
 
     auto Factory = llvm::make_unique<SymbolIndexActionFactory>(CollectorOpts);
 
-    std::vector<std::string> Args = {"symbol_collector", "-fsyntax-only",
-                                     "-std=c++11", TestFileName};
+    std::vector<std::string> Args = {"symbol_collector",
+                                     "-fsyntax-only",
+                                     "-std=c++11",
+                                     "-include",
+                                     llvm::sys::path::filename(TestHeaderName),
+                                     TestFileName};
     Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end());
     tooling::ToolInvocation Invocation(
         Args,
         Factory->create(), Files.get(),
         std::make_shared<PCHContainerOperations>());
 
     InMemoryFileSystem->addFile(TestHeaderName, 0,
                                 llvm::MemoryBuffer::getMemBuffer(HeaderCode));
-
-    std::string Content = MainCode;
-    if (!HeaderCode.empty())
-      Content = (llvm::Twine("#include\"") +
-                 llvm::sys::path::filename(TestHeaderName) + "\"\n" + Content)
-                    .str();
     InMemoryFileSystem->addFile(TestFileName, 0,
-                                llvm::MemoryBuffer::getMemBuffer(Content));
+                                llvm::MemoryBuffer::getMemBuffer(MainCode));
     Invocation.run();
     Symbols = Factory->Collector->takeSymbols();
     return true;
@@ -176,6 +180,42 @@
                    QName("foo::bar::v2"), QName("foo::baz")}));
 }
 
+TEST_F(SymbolCollectorTest, Locations) {
+  // FIXME: the behavior here is bad: chopping tokens, including more than the
+  // ident range, using half-open ranges. See fixmes in getSymbolLocation().
+  CollectorOpts.IndexMainFiles = true;
+  Annotations Header(R"cpp(
+    // Declared in header, defined in main.
+    $xdecl[[extern int X]];
+    $clsdecl[[class C]]ls;
+    $printdecl[[void print()]];
+
+    // Declared in header, defined nowhere.
+    $zdecl[[extern int Z]];
+  )cpp");
+  Annotations Main(R"cpp(
+    $xdef[[int X = 4]]2;
+    $clsdef[[class Cls {}]];
+    $printdef[[void print() {}]]
+
+    // Declared/defined in main only.
+    $y[[int Y]];
+  )cpp");
+  runSymbolCollector(Header.code(), Main.code());
+  EXPECT_THAT(
+      Symbols,
+      UnorderedElementsAre(
+          AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")),
+                DefRange(Main.offsetRange("xdef"))),
+          AllOf(QName("Cls"), DeclRange(Header.offsetRange("clsdecl")),
+                DefRange(Main.offsetRange("clsdef"))),
+          AllOf(QName("print"), DeclRange(Header.offsetRange("printdecl")),
+                DefRange(Main.offsetRange("printdef"))),
+          AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl"))),
+          AllOf(QName("Y"), DeclRange(Main.offsetRange("y")),
+                DefRange(Main.offsetRange("y")))));
+}
+
 TEST_F(SymbolCollectorTest, SymbolRelativeNoFallback) {
   CollectorOpts.IndexMainFiles = false;
   runSymbolCollector("class Foo {};", /*Main=*/"");
@@ -280,10 +320,9 @@
   EXPECT_THAT(
       Symbols,
       UnorderedElementsAre(
-          AllOf(QName("abc_Test"),
-                LocationOffsets(Header.offsetRange("expansion")),
+          AllOf(QName("abc_Test"), DeclRange(Header.offsetRange("expansion")),
                 DeclURI(TestHeaderURI)),
-          AllOf(QName("Test"), LocationOffsets(Header.offsetRange("spelling")),
+          AllOf(QName("Test"), DeclRange(Header.offsetRange("spelling")),
                 DeclURI(TestHeaderURI))));
 }
 
@@ -302,13 +341,13 @@
     FF2();
   )");
   runSymbolCollector(/*Header=*/"", Main.code());
-  EXPECT_THAT(Symbols, UnorderedElementsAre(
-                           AllOf(QName("abc_Test"),
-                                 LocationOffsets(Main.offsetRange("expansion")),
-                                 DeclURI(TestFileURI)),
-                           AllOf(QName("Test"),
-                                 LocationOffsets(Main.offsetRange("spelling")),
-                                 DeclURI(TestFileURI))));
+  EXPECT_THAT(
+      Symbols,
+      UnorderedElementsAre(
+          AllOf(QName("abc_Test"), DeclRange(Main.offsetRange("expansion")),
+                DeclURI(TestFileURI)),
+          AllOf(QName("Test"), DeclRange(Main.offsetRange("spelling")),
+                DeclURI(TestFileURI))));
 }
 
 TEST_F(SymbolCollectorTest, SymbolFormedByCLI) {
@@ -322,10 +361,10 @@
 
   runSymbolCollector(Header.code(), /*Main=*/"",
                      /*ExtraArgs=*/{"-DNAME=name"});
-  EXPECT_THAT(Symbols, UnorderedElementsAre(AllOf(
-                           QName("name"),
-                           LocationOffsets(Header.offsetRange("expansion")),
-                           DeclURI(TestHeaderURI))));
+  EXPECT_THAT(Symbols,
+              UnorderedElementsAre(AllOf(
+                  QName("name"), DeclRange(Header.offsetRange("expansion")),
+                  DeclURI(TestHeaderURI))));
 }
 
 TEST_F(SymbolCollectorTest, IgnoreSymbolsInMainFile) {
@@ -503,19 +542,19 @@
 ...
 )";
 
-  auto Symbols1 = SymbolFromYAML(YAML1);
+  auto Symbols1 = SymbolsFromYAML(YAML1);
   EXPECT_THAT(Symbols1,
               UnorderedElementsAre(AllOf(
                   QName("clang::Foo1"), Labeled("Foo1-label"), Doc("Foo doc"),
                   Detail("int"), DeclURI("file:///path/foo.h"))));
-  auto Symbols2 = SymbolFromYAML(YAML2);
+  auto Symbols2 = SymbolsFromYAML(YAML2);
   EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf(
                             QName("clang::Foo2"), Labeled("Foo2-label"),
                             Not(HasDetail()), DeclURI("file:///path/bar.h"))));
 
   std::string ConcatenatedYAML =
       SymbolsToYAML(Symbols1) + SymbolsToYAML(Symbols2);
-  auto ConcatenatedSymbols = SymbolFromYAML(ConcatenatedYAML);
+  auto ConcatenatedSymbols = SymbolsFromYAML(ConcatenatedYAML);
   EXPECT_THAT(ConcatenatedSymbols,
               UnorderedElementsAre(QName("clang::Foo1"),
                                    QName("clang::Foo2")));
Index: unittests/clangd/IndexTests.cpp
===================================================================
--- unittests/clangd/IndexTests.cpp
+++ unittests/clangd/IndexTests.cpp
@@ -248,6 +248,28 @@
   EXPECT_EQ(M.Detail->Documentation, "--doc--");
 }
 
+TEST(MergeTest, PreferSymbolWithDefn) {
+  Symbol L, R;
+  Symbol::Details Scratch;
+
+  L.ID = R.ID = SymbolID("hello");
+  L.CanonicalDeclaration.FileURI = "file:/left.h";
+  R.CanonicalDeclaration.FileURI = "file:/right.h";
+  L.CompletionPlainInsertText = "left-insert";
+  R.CompletionPlainInsertText = "right-insert";
+
+  Symbol M = mergeSymbol(L, R, &Scratch);
+  EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:/left.h");
+  EXPECT_EQ(M.Definition.FileURI, "");
+  EXPECT_EQ(M.CompletionPlainInsertText, "left-insert");
+
+  R.Definition.FileURI = "file:/right.cpp"; // Now right will be favored.
+  M = mergeSymbol(L, R, &Scratch);
+  EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:/right.h");
+  EXPECT_EQ(M.Definition.FileURI, "file:/right.cpp");
+  EXPECT_EQ(M.CompletionPlainInsertText, "right-insert");
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clangd/tool/ClangdMain.cpp
===================================================================
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -37,7 +37,7 @@
     llvm::errs() << "Can't open " << YamlSymbolFile << "\n";
     return nullptr;
   }
-  auto Slab = SymbolFromYAML(Buffer.get()->getBuffer());
+  auto Slab = SymbolsFromYAML(Buffer.get()->getBuffer());
   SymbolSlab::Builder SymsBuilder;
   for (auto Sym : Slab)
     SymsBuilder.insert(Sym);
Index: clangd/index/SymbolYAML.h
===================================================================
--- clangd/index/SymbolYAML.h
+++ clangd/index/SymbolYAML.h
@@ -25,7 +25,11 @@
 namespace clangd {
 
 // Read symbols from a YAML-format string.
-SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent);
+SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent);
+
+// Read one symbol from a YAML-format string, backed by the arena.
+Symbol SymbolFromYAML(llvm::StringRef YAMLContent,
+                      llvm::BumpPtrAllocator &Arena);
 
 // Convert a single symbol to YAML-format string.
 // The YAML result is safe to concatenate.
Index: clangd/index/SymbolYAML.cpp
===================================================================
--- clangd/index/SymbolYAML.cpp
+++ clangd/index/SymbolYAML.cpp
@@ -97,7 +97,9 @@
     IO.mapRequired("Name", Sym.Name);
     IO.mapRequired("Scope", Sym.Scope);
     IO.mapRequired("SymInfo", Sym.SymInfo);
-    IO.mapRequired("CanonicalDeclaration", Sym.CanonicalDeclaration);
+    IO.mapOptional("CanonicalDeclaration", Sym.CanonicalDeclaration,
+                   SymbolLocation());
+    IO.mapOptional("Definition", Sym.Definition, SymbolLocation());
     IO.mapRequired("CompletionLabel", Sym.CompletionLabel);
     IO.mapRequired("CompletionFilterText", Sym.CompletionFilterText);
     IO.mapRequired("CompletionPlainInsertText", Sym.CompletionPlainInsertText);
@@ -160,7 +162,7 @@
 namespace clang {
 namespace clangd {
 
-SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent) {
+SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent) {
   // Store data of pointer fields (excl. `StringRef`) like `Detail`.
   llvm::BumpPtrAllocator Arena;
   llvm::yaml::Input Yin(YAMLContent, &Arena);
@@ -173,6 +175,14 @@
   return std::move(Syms).build();
 }
 
+Symbol SymbolFromYAML(llvm::StringRef YAMLContent,
+                      llvm::BumpPtrAllocator &Arena) {
+  llvm::yaml::Input Yin(YAMLContent, &Arena);
+  Symbol S;
+  Yin >> S;
+  return S;
+}
+
 std::string SymbolsToYAML(const SymbolSlab& Symbols) {
   std::string Str;
   llvm::raw_string_ostream OS(Str);
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -58,6 +58,9 @@
   SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
 
 private:
+  const Symbol *addDeclaration(const NamedDecl &, SymbolID);
+  void addDefinition(const NamedDecl &, const Symbol &DeclSymbol);
+
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;
   ASTContext *ASTCtx;
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -132,13 +132,19 @@
 // For symbols defined inside macros:
 //   * use expansion location, if the symbol is formed via macro concatenation.
 //   * use spelling location, otherwise.
+//
+// FIXME: EndOffset is inclusive (closed range), and should be exclusive.
+// FIXME: Because the underlying ranges are token ranges, this code chops the
+//        last token in half if it contains multiple characters.
+// FIXME: We probably want to get just the location of the symbol name, not
+//        the whole e.g. class.
 llvm::Optional<SymbolLocation>
-getSymbolLocation(const NamedDecl *D, SourceManager &SM,
+getSymbolLocation(const NamedDecl &D, SourceManager &SM,
                   const SymbolCollector::Options &Opts,
                   std::string &FileURIStorage) {
-  SourceLocation Loc = D->getLocation();
-  SourceLocation StartLoc = SM.getSpellingLoc(D->getLocStart());
-  SourceLocation EndLoc = SM.getSpellingLoc(D->getLocEnd());
+  SourceLocation Loc = D.getLocation();
+  SourceLocation StartLoc = SM.getSpellingLoc(D.getLocStart());
+  SourceLocation EndLoc = SM.getSpellingLoc(D.getLocEnd());
   if (Loc.isMacroID()) {
     std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM);
     if (llvm::StringRef(PrintLoc).startswith("<scratch") ||
@@ -149,17 +155,20 @@
       //     be "<scratch space>"
       //   * symbols controlled and defined by a compile command-line option
       //     `-DName=foo`, the spelling location will be "<command line>".
-      StartLoc = SM.getExpansionRange(D->getLocStart()).first;
-      EndLoc = SM.getExpansionRange(D->getLocEnd()).second;
+      StartLoc = SM.getExpansionRange(D.getLocStart()).first;
+      EndLoc = SM.getExpansionRange(D.getLocEnd()).second;
     }
   }
 
   auto U = toURI(SM, SM.getFilename(StartLoc), Opts);
   if (!U)
     return llvm::None;
   FileURIStorage = std::move(*U);
-  return SymbolLocation{FileURIStorage, SM.getFileOffset(StartLoc),
-                        SM.getFileOffset(EndLoc)};
+  SymbolLocation Result;
+  Result.FileURI = FileURIStorage;
+  Result.StartOffset = SM.getFileOffset(StartLoc);
+  Result.EndOffset = SM.getFileOffset(EndLoc);
+  return std::move(Result);
 }
 
 } // namespace
@@ -195,62 +204,85 @@
       return true;
 
     auto ID = SymbolID(USR);
-    if (Symbols.find(ID) != nullptr)
-      return true;
+    const Symbol* BasicSymbol = Symbols.find(ID);
+    if (!BasicSymbol) // Regardless of role, ND is the canonical declaration.
+      BasicSymbol = addDeclaration(*ND, std::move(ID));
+    if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
+      addDefinition(*cast<NamedDecl>(ASTNode.OrigD), *BasicSymbol);
+  }
+  return true;
+}
 
-    auto &SM = ND->getASTContext().getSourceManager();
+const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND,
+                                              SymbolID ID) {
+  auto &SM = ND.getASTContext().getSourceManager();
 
-    std::string QName;
-    llvm::raw_string_ostream OS(QName);
-    PrintingPolicy Policy(ASTCtx->getLangOpts());
-    // Note that inline namespaces are treated as transparent scopes. This
-    // reflects the way they're most commonly used for lookup. Ideally we'd
-    // include them, but at query time it's hard to find all the inline
-    // namespaces to query: the preamble doesn't have a dedicated list.
-    Policy.SuppressUnwrittenScope = true;
-    ND->printQualifiedName(OS, Policy);
-    OS.flush();
+  std::string QName;
+  llvm::raw_string_ostream OS(QName);
+  PrintingPolicy Policy(ASTCtx->getLangOpts());
+  // Note that inline namespaces are treated as transparent scopes. This
+  // reflects the way they're most commonly used for lookup. Ideally we'd
+  // include them, but at query time it's hard to find all the inline
+  // namespaces to query: the preamble doesn't have a dedicated list.
+  Policy.SuppressUnwrittenScope = true;
+  ND.printQualifiedName(OS, Policy);
+  OS.flush();
 
-    Symbol S;
-    S.ID = std::move(ID);
-    std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
-    S.SymInfo = index::getSymbolInfo(D);
-    std::string URIStorage;
-    if (auto DeclLoc = getSymbolLocation(ND, SM, Opts, URIStorage))
-      S.CanonicalDeclaration = *DeclLoc;
+  Symbol S;
+  S.ID = std::move(ID);
+  std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
+  S.SymInfo = index::getSymbolInfo(&ND);
+  std::string FileURI;
+  // FIXME: we may want a different "canonical" heuristic than clang chooses.
+  if (auto DeclLoc = getSymbolLocation(ND, SM, Opts, FileURI))
+    S.CanonicalDeclaration = *DeclLoc;
 
-    // Add completion info.
-    assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
-    CodeCompletionResult SymbolCompletion(ND, 0);
-    const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
-        *ASTCtx, *PP, CodeCompletionContext::CCC_Name, *CompletionAllocator,
-        *CompletionTUInfo,
-        /*IncludeBriefComments*/ true);
-    std::string Label;
-    std::string SnippetInsertText;
-    std::string IgnoredLabel;
-    std::string PlainInsertText;
-    getLabelAndInsertText(*CCS, &Label, &SnippetInsertText,
-                          /*EnableSnippets=*/true);
-    getLabelAndInsertText(*CCS, &IgnoredLabel, &PlainInsertText,
-                          /*EnableSnippets=*/false);
-    std::string FilterText = getFilterText(*CCS);
-    std::string Documentation = getDocumentation(*CCS);
-    std::string CompletionDetail = getDetail(*CCS);
+  // Add completion info.
+  // FIXME: we may want to choose a different redecl, or combine from several.
+  assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
+  CodeCompletionResult SymbolCompletion(&ND, 0);
+  const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
+      *ASTCtx, *PP, CodeCompletionContext::CCC_Name, *CompletionAllocator,
+      *CompletionTUInfo,
+      /*IncludeBriefComments*/ true);
+  std::string Label;
+  std::string SnippetInsertText;
+  std::string IgnoredLabel;
+  std::string PlainInsertText;
+  getLabelAndInsertText(*CCS, &Label, &SnippetInsertText,
+                        /*EnableSnippets=*/true);
+  getLabelAndInsertText(*CCS, &IgnoredLabel, &PlainInsertText,
+                        /*EnableSnippets=*/false);
+  std::string FilterText = getFilterText(*CCS);
+  std::string Documentation = getDocumentation(*CCS);
+  std::string CompletionDetail = getDetail(*CCS);
 
-    S.CompletionFilterText = FilterText;
-    S.CompletionLabel = Label;
-    S.CompletionPlainInsertText = PlainInsertText;
-    S.CompletionSnippetInsertText = SnippetInsertText;
-    Symbol::Details Detail;
-    Detail.Documentation = Documentation;
-    Detail.CompletionDetail = CompletionDetail;
-    S.Detail = &Detail;
+  S.CompletionFilterText = FilterText;
+  S.CompletionLabel = Label;
+  S.CompletionPlainInsertText = PlainInsertText;
+  S.CompletionSnippetInsertText = SnippetInsertText;
+  Symbol::Details Detail;
+  Detail.Documentation = Documentation;
+  Detail.CompletionDetail = CompletionDetail;
+  S.Detail = &Detail;
 
-    Symbols.insert(S);
-  }
+  Symbols.insert(S);
+  return Symbols.find(S.ID);
+}
 
-  return true;
+void SymbolCollector::addDefinition(const NamedDecl &ND,
+                                    const Symbol &DeclSym) {
+  if (DeclSym.Definition)
+    return;
+  // If we saw some forward declaration, we end up copying the symbol.
+  // This is not ideal, but avoids duplicating the "is this a definition" check
+  // in clang::index. We should only see one definition.
+  Symbol S = DeclSym;
+  std::string FileURI;
+  if (auto DefLoc = getSymbolLocation(ND, ND.getASTContext().getSourceManager(),
+                                      Opts, FileURI))
+    S.Definition = *DefLoc;
+  Symbols.insert(S);
 }
 
 } // namespace clangd
Index: clangd/index/Merge.cpp
===================================================================
--- clangd/index/Merge.cpp
+++ clangd/index/Merge.cpp
@@ -60,32 +60,40 @@
 Symbol
 mergeSymbol(const Symbol &L, const Symbol &R, Symbol::Details *Scratch) {
   assert(L.ID == R.ID);
-  Symbol S = L;
-  // For each optional field, fill it from R if missing in L.
-  // (It might be missing in R too, but that's a no-op).
-  if (S.CanonicalDeclaration.FileURI == "")
-    S.CanonicalDeclaration = R.CanonicalDeclaration;
+  // We prefer information from TUs that saw the definition.
+  // Classes: this is the def itself. Functions: hopefully the header decl.
+  // If both did (or both didn't), continue to prefer L over R.
+  bool PreferR = R.Definition && !L.Definition;
+  Symbol S = PreferR ? R : L;        // The target symbol we're merging into.
+  const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.
+
+  // For each optional field, fill it from O if missing in S.
+  // (It might be missing in O too, but that's a no-op).
+  if (!S.Definition)
+    S.Definition = O.Definition;
+  if (!S.CanonicalDeclaration)
+    S.CanonicalDeclaration = O.CanonicalDeclaration;
   if (S.CompletionLabel == "")
-    S.CompletionLabel = R.CompletionLabel;
+    S.CompletionLabel = O.CompletionLabel;
   if (S.CompletionFilterText == "")
-    S.CompletionFilterText = R.CompletionFilterText;
+    S.CompletionFilterText = O.CompletionFilterText;
   if (S.CompletionPlainInsertText == "")
-    S.CompletionPlainInsertText = R.CompletionPlainInsertText;
+    S.CompletionPlainInsertText = O.CompletionPlainInsertText;
   if (S.CompletionSnippetInsertText == "")
-    S.CompletionSnippetInsertText = R.CompletionSnippetInsertText;
+    S.CompletionSnippetInsertText = O.CompletionSnippetInsertText;
 
-  if (L.Detail && R.Detail) {
+  if (S.Detail && O.Detail) {
     // Copy into scratch space so we can merge.
-    *Scratch = *L.Detail;
+    *Scratch = *S.Detail;
     if (Scratch->Documentation == "")
-      Scratch->Documentation = R.Detail->Documentation;
+      Scratch->Documentation = O.Detail->Documentation;
     if (Scratch->CompletionDetail == "")
-      Scratch->CompletionDetail = R.Detail->CompletionDetail;
+      Scratch->CompletionDetail = O.Detail->CompletionDetail;
     S.Detail = Scratch;
   } else if (L.Detail)
-    S.Detail = L.Detail;
-  else if (R.Detail)
-    S.Detail = R.Detail;
+    /* Current state: S.Detail = O.Detail */;
+  else if (O.Detail)
+    S.Detail = O.Detail;
   return S;
 }
 
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -27,11 +27,14 @@
   llvm::StringRef FileURI;
   // The 0-based offset to the first character of the symbol from the beginning
   // of the source file.
-  unsigned StartOffset;
+  unsigned StartOffset = 0;
   // The 0-based offset to the last character of the symbol from the beginning
   // of the source file.
-  unsigned EndOffset;
+  unsigned EndOffset = 0;
+
+  operator bool() const { return !FileURI.empty(); }
 };
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
 
 // The class identifies a particular C++ symbol (class, function, method, etc).
 //
@@ -44,7 +47,7 @@
 class SymbolID {
 public:
   SymbolID() = default;
-  SymbolID(llvm::StringRef USR);
+  explicit SymbolID(llvm::StringRef USR);
 
   bool operator==(const SymbolID &Sym) const {
     return HashValue == Sym.HashValue;
@@ -117,14 +120,17 @@
   llvm::StringRef Name;
   // The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
   llvm::StringRef Scope;
-  // The location of the canonical declaration of the symbol.
+  // The location of the preferred declaration of the symbol.
   //
   // A C++ symbol could have multiple declarations and one definition (e.g.
   // a function is declared in ".h" file, and is defined in ".cc" file).
   //   * For classes, the canonical declaration is usually definition.
   //   * For non-inline functions, the canonical declaration is a declaration
   //     (not a definition), which is usually declared in ".h" file.
   SymbolLocation CanonicalDeclaration;
+  // The location of the symbol's definition, if one was found.
+  // This covers the whole definition (e.g. class body).
+  SymbolLocation Definition;
 
   /// A brief description of the symbol that can be displayed in the completion
   /// candidate list. For example, "Foo(X x, Y y) const" is a labal for a
@@ -160,6 +166,7 @@
   // FIXME: add all occurrences support.
   // FIXME: add extra fields for index scoring signals.
 };
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
 
 // An immutable symbol container that stores a set of symbols.
 // The container will maintain the lifetime of the symbols.
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -10,11 +10,18 @@
 #include "Index.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/SHA1.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 namespace clangd {
 using namespace llvm;
 
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) {
+  if (!L)
+    return OS << "(none)";
+  return OS << L.FileURI << "[" << L.StartOffset << "-" << L.EndOffset << "]";
+}
+
 SymbolID::SymbolID(StringRef USR)
     : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
 
@@ -29,6 +36,10 @@
   std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin());
 }
 
+raw_ostream &operator<<(raw_ostream &OS, const Symbol &S) {
+  return OS << S.Scope << S.Name;
+}
+
 SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
   auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID,
                              [](const Symbol &S, const SymbolID &I) {
@@ -55,6 +66,7 @@
   Intern(S.Name);
   Intern(S.Scope);
   Intern(S.CanonicalDeclaration.FileURI);
+  Intern(S.Definition.FileURI);
 
   Intern(S.CompletionLabel);
   Intern(S.CompletionFilterText);
Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
===================================================================
--- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
+++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
@@ -14,9 +14,11 @@
 //===---------------------------------------------------------------------===//
 
 #include "index/Index.h"
+#include "index/Merge.h"
 #include "index/SymbolCollector.h"
 #include "index/SymbolYAML.h"
 #include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/CompilerInstance.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexingAction.h"
 #include "clang/Tooling/CommonOptionsParser.h"
@@ -34,6 +36,7 @@
 
 namespace clang {
 namespace clangd {
+namespace {
 
 static llvm::cl::opt<std::string> AssumedHeaderDir(
     "assume-header-dir",
@@ -60,6 +63,12 @@
                 index::createIndexingAction(C, Opts, nullptr)),
             Ctx(Ctx), Collector(C) {}
 
+      // XXX this is just to make running the tool fast during dev!
+      bool BeginInvocation(CompilerInstance &CI) override {
+        const auto &Inputs = CI.getInvocation().getFrontendOpts().Inputs;
+        return !Inputs.empty() && Inputs[0].getFile().contains("clangd/index");
+      }
+
       void EndSourceFileAction() override {
         WrapperFrontendAction::EndSourceFileAction();
 
@@ -91,6 +100,25 @@
   tooling::ExecutionContext *Ctx;
 };
 
+// Combine occurrences of the same symbol across translation units.
+SymbolSlab mergeSymbols(tooling::ToolResults *Results) {
+  SymbolSlab::Builder UniqueSymbols;
+  llvm::BumpPtrAllocator Arena;
+  Symbol::Details Scratch;
+  Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) {
+    Arena.Reset();
+    auto Sym = clang::clangd::SymbolFromYAML(Value, Arena);
+    clang::clangd::SymbolID ID;
+    Key >> ID;
+    if (const auto *Existing = UniqueSymbols.find(ID))
+      UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
+    else
+      UniqueSymbols.insert(Sym);
+  });
+  return std::move(UniqueSymbols).build();
+}
+
+} // namespace
 } // namespace clangd
 } // namespace clang
 
@@ -115,29 +143,20 @@
     return 1;
   }
 
+  // Map phase: emit symbols found in each translation unit.
   auto Err = Executor->get()->execute(
       llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(
           Executor->get()->getExecutionContext()));
   if (Err) {
     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
   }
 
-  // Deduplicate the result by key and keep the longest value.
-  // FIXME(ioeric): Merge occurrences, rather than just dropping all but one.
-  // Definitions and forward declarations have the same key and may both have
-  // information. Usage count will need to be aggregated across occurrences,
-  // too.
-  llvm::StringMap<llvm::StringRef> UniqueSymbols;
-  Executor->get()->getToolResults()->forEachResult(
-      [&UniqueSymbols](llvm::StringRef Key, llvm::StringRef Value) {
-        auto Ret = UniqueSymbols.try_emplace(Key, Value);
-        if (!Ret.second) {
-          // If key already exists, keep the longest value.
-          llvm::StringRef &V = Ret.first->second;
-          V = V.size() < Value.size() ? Value : V;
-        }
-      });
+  // Reduce phase: combine symbols using the ID as a key.
+  auto UniqueSymbols =
+      clang::clangd::mergeSymbols(Executor->get()->getToolResults());
+
+  // Output phase: emit YAML for result symbols.
   for (const auto &Sym : UniqueSymbols)
-    llvm::outs() << Sym.second;
+    llvm::outs() << SymbolToYAML(Sym);
   return 0;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to