[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-14 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 126919.
ioeric marked 3 inline comments as done.
ioeric added a comment.

- Address more comments. # I am going to land it now.
- Merge remote-tracking branch 'origin/master' into index


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/index/Index.h
  clangd/index/MemIndex.cpp
  clangd/index/MemIndex.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/IndexTests.cpp

Index: unittests/clangd/IndexTests.cpp
===
--- /dev/null
+++ unittests/clangd/IndexTests.cpp
@@ -0,0 +1,114 @@
+//===-- IndexTests.cpp  ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+  Symbol Sym;
+  Sym.ID = SymbolID(ID);
+  Sym.QualifiedName = ID;
+  return Sym;
+}
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector Pointers;
+};
+
+// Create a slab of symbols with IDs and names [Begin, End]. The life time of
+// the slab is managed by the returned shared pointer. If \p WeakSymbols is
+// provided, it will be pointed to the managed object in the returned shared
+// pointer.
+std::shared_ptr>
+generateNumSymbols(int Begin, int End,
+   std::weak_ptr *WeakSymbols = nullptr) {
+  auto Slab = std::make_shared();
+  if (WeakSymbols)
+*WeakSymbols = Slab;
+
+  for (int i = Begin; i <= End; i++)
+Slab->Slab.insert(symbol(std::to_string(i)));
+
+  for (const auto &Sym : Slab->Slab)
+Slab->Pointers.push_back(&Sym.second);
+
+  return {std::move(Slab), &Slab->Pointers};
+}
+
+std::vector match(const SymbolIndex &I,
+   const FuzzyFindRequest &Req) {
+  std::vector Matches;
+  auto Ctx = Context::empty();
+  I.fuzzyFind(Ctx, Req,
+  [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+  return Matches;
+}
+
+TEST(MemIndexTest, MemIndexSymbolsRecycled) {
+  MemIndex I;
+  std::weak_ptr Symbols;
+  I.build(generateNumSymbols(0, 10, &Symbols));
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  EXPECT_FALSE(Symbols.expired());
+  // Release old symbols.
+  I.build(generateNumSymbols(0, 0));
+  EXPECT_TRUE(Symbols.expired());
+}
+
+TEST(MemIndexTest, MemIndexMatchSubstring) {
+  MemIndex I;
+  I.build(generateNumSymbols(5, 25));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST(MemIndexTest, MemIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  MemIndex I;
+  I.build(std::move(Symbols));
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST(MemIndexTest, MemIndexLimitedNumMatches) {
+  MemIndex I;
+  I.build(generateNumSymbols(0, 100));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -13,6 +13,7 @@
   CodeCompleteTests.cpp
   ContextTests.cpp
   FuzzyMatchTests.cpp
+  IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
Index: clangd/index/MemIndex.h
===
--- /dev/null
+++ clangd/index/MemIndex.h
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include 
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index for a (relatively small) set of symbols that
+/// can be easily managed in memory.
+class MemIndex : public SymbolIndex {
+public:
+  /// \brief (Re-)Build index for `Symbols`

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-14 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added inline comments.



Comment at: unittests/clangd/IndexTests.cpp:29
+
+struct SlabAndPointers {
+  SymbolSlab Slab;

this could be local to generateNumSymbols, up to you



Comment at: unittests/clangd/IndexTests.cpp:51
+
+  auto Symbols = std::shared_ptr>(std::move(Slab),
+  &Slab->Pointers);

nit: just return? (you can skip spelling out the shared_ptr type with {}, I 
think)



Comment at: unittests/clangd/IndexTests.cpp:73
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  // Release old symbols.

assert symbols is not expired here?


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-14 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 126917.
ioeric marked an inline comment as done.
ioeric added a comment.

- Fix comment for fuzzyFind


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/index/Index.h
  clangd/index/MemIndex.cpp
  clangd/index/MemIndex.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/IndexTests.cpp

Index: unittests/clangd/IndexTests.cpp
===
--- /dev/null
+++ unittests/clangd/IndexTests.cpp
@@ -0,0 +1,116 @@
+//===-- IndexTests.cpp  ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+  Symbol Sym;
+  Sym.ID = SymbolID(ID);
+  Sym.QualifiedName = ID;
+  return Sym;
+}
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector Pointers;
+};
+
+// Create a slab of symbols with IDs and names [Begin, End]. The life time of
+// the slab is managed by the returned shared pointer. If \p WeakSymbols is
+// provided, it will be pointed to the managed object in the returned shared
+// pointer.
+std::shared_ptr>
+generateNumSymbols(int Begin, int End,
+   std::weak_ptr *WeakSymbols = nullptr) {
+  auto Slab = std::make_shared();
+  if (WeakSymbols)
+*WeakSymbols = Slab;
+
+  for (int i = Begin; i <= End; i++)
+Slab->Slab.insert(symbol(std::to_string(i)));
+
+  for (const auto &Sym : Slab->Slab)
+Slab->Pointers.push_back(&Sym.second);
+
+  auto Symbols = std::shared_ptr>(std::move(Slab),
+  &Slab->Pointers);
+
+  return Symbols;
+}
+
+std::vector match(const SymbolIndex &I,
+   const FuzzyFindRequest &Req) {
+  std::vector Matches;
+  auto Ctx = Context::empty();
+  I.fuzzyFind(Ctx, Req,
+  [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+  return Matches;
+}
+
+TEST(MemIndexTest, MemIndexSymbolsRecycled) {
+  MemIndex I;
+  std::weak_ptr Symbols;
+  I.build(generateNumSymbols(0, 10, &Symbols));
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  // Release old symbols.
+  I.build(generateNumSymbols(0, 0));
+  EXPECT_TRUE(Symbols.expired());
+}
+
+TEST(MemIndexTest, MemIndexMatchSubstring) {
+  MemIndex I;
+  I.build(generateNumSymbols(5, 25));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST(MemIndexTest, MemIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  MemIndex I;
+  I.build(std::move(Symbols));
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST(MemIndexTest, MemIndexLimitedNumMatches) {
+  MemIndex I;
+  I.build(generateNumSymbols(0, 100));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -13,6 +13,7 @@
   CodeCompleteTests.cpp
   ContextTests.cpp
   FuzzyMatchTests.cpp
+  IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
Index: clangd/index/MemIndex.h
===
--- /dev/null
+++ clangd/index/MemIndex.h
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include 
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index for a (relatively small) set of symbols that
+/// can be easily managed in memory.
+class MemIndex : public SymbolIndex {
+public:
+  /// \brief (Re-)Build index for `Symbols`. All symbol

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-14 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 126916.
ioeric marked 7 inline comments as done.
ioeric added a comment.

- Address review comments


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/index/Index.h
  clangd/index/MemIndex.cpp
  clangd/index/MemIndex.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/IndexTests.cpp

Index: unittests/clangd/IndexTests.cpp
===
--- /dev/null
+++ unittests/clangd/IndexTests.cpp
@@ -0,0 +1,116 @@
+//===-- IndexTests.cpp  ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+  Symbol Sym;
+  Sym.ID = SymbolID(ID);
+  Sym.QualifiedName = ID;
+  return Sym;
+}
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector Pointers;
+};
+
+// Create a slab of symbols with IDs and names [Begin, End]. The life time of
+// the slab is managed by the returned shared pointer. If \p WeakSymbols is
+// provided, it will be pointed to the managed object in the returned shared
+// pointer.
+std::shared_ptr>
+generateNumSymbols(int Begin, int End,
+   std::weak_ptr *WeakSymbols = nullptr) {
+  auto Slab = std::make_shared();
+  if (WeakSymbols)
+*WeakSymbols = Slab;
+
+  for (int i = Begin; i <= End; i++)
+Slab->Slab.insert(symbol(std::to_string(i)));
+
+  for (const auto &Sym : Slab->Slab)
+Slab->Pointers.push_back(&Sym.second);
+
+  auto Symbols = std::shared_ptr>(std::move(Slab),
+  &Slab->Pointers);
+
+  return Symbols;
+}
+
+std::vector match(const SymbolIndex &I,
+   const FuzzyFindRequest &Req) {
+  std::vector Matches;
+  auto Ctx = Context::empty();
+  I.fuzzyFind(Ctx, Req,
+  [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+  return Matches;
+}
+
+TEST(MemIndexTest, MemIndexSymbolsRecycled) {
+  MemIndex I;
+  std::weak_ptr Symbols;
+  I.build(generateNumSymbols(0, 10, &Symbols));
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  // Release old symbols.
+  I.build(generateNumSymbols(0, 0));
+  EXPECT_TRUE(Symbols.expired());
+}
+
+TEST(MemIndexTest, MemIndexMatchSubstring) {
+  MemIndex I;
+  I.build(generateNumSymbols(5, 25));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST(MemIndexTest, MemIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  MemIndex I;
+  I.build(std::move(Symbols));
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST(MemIndexTest, MemIndexLimitedNumMatches) {
+  MemIndex I;
+  I.build(generateNumSymbols(0, 100));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -13,6 +13,7 @@
   CodeCompleteTests.cpp
   ContextTests.cpp
   FuzzyMatchTests.cpp
+  IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
Index: clangd/index/MemIndex.h
===
--- /dev/null
+++ clangd/index/MemIndex.h
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include 
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index for a (relatively small) set of symbols that
+/// can be easily managed in memory.
+class MemIndex : public SymbolIndex {
+public:
+  /// \brief (Re-)Build index for `Symbols`. All symbol p

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-14 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added a comment.
This revision is now accepted and ready to land.

Thanks for the split & refactoring! This looks nice. Just nits, plus some 
thoughts on the test.




Comment at: clangd/index/Index.cpp:11
 #include "Index.h"
-
+#include "clang/Sema/CodeCompleteConsumer.h"
 #include "llvm/Support/SHA1.h"

changes in this file can now be reverted I think



Comment at: clangd/index/Index.h:128
+  /// \brief Matches symbols in the index fuzzily and applies \p Callback on
+  /// each matched symbol.
+  ///

nit: "... before returning".

Just to be explicit that this is a synchronous API.



Comment at: clangd/index/Index.h:130
+  ///
+  /// Returns true if all candidates are matched.
+  virtual bool

This is a little vague - Returns true if the result list is complete, false if 
it was truncated due to MaxCandidateCount?

Might be clearer to invert it - Returns true if the result list was truncated - 
up to you.



Comment at: clangd/index/Index.h:132
+  virtual bool
+  fuzzyFind(const FuzzyFindRequest &Req,
+std::function Callback) const = 0;

context patch has landed, so this should now take const Context& as first 
parameter



Comment at: clangd/index/Index.h:136
+  // FIXME: add interfaces for more index use cases:
+  //  - Symbol getSymbolInfo(llvm::StringRef USR);
+  //  - getAllOccurrences(llvm::StringRef USR);

USRs will rather be SymbolIDs



Comment at: clangd/index/MemIndex.cpp:38
+  // Find all symbols that contain the query, igoring cases.
+  // FIXME: use better matching algorithm, e.g. fuzzy matcher.
+  if (StringRef(StringRef(Sym->QualifiedName).lower())

you can easily do this now - it shouldn't even be more lines of code.
Up to you of course.



Comment at: unittests/clangd/IndexTests.cpp:29
+
+struct CountedSymbolSlab {
+  CountedSymbolSlab() = delete;

nit: given the name, I actually think inheriting from Slab might be clearer.

Though I'm not sure we actually need this, see `weak_ptr` part of next comment.



Comment at: unittests/clangd/IndexTests.cpp:41
+
+class MemIndexTest : public ::testing::Test {
+protected:

This is a nice test, the one weakness I see is it's hard to read the cases in 
isolation as there's quite a lot of action-at-a-distance.
I think we can eliminate the base class without adding much boilerplate, which 
would make the tests more self-contained:
  - `Index` can be owned by the test
  - `match()` is good, but moving `Index.build()` call into the test, and 
accepting `Index` as a param means this interaction is more obvious and `match` 
can be a free function.
  - `CountedSymbolSlab` can more directly be replaced by a `weak_ptr`, which 
can tell you whether a `shared_ptr` is alive
  - `generateNumSymbols` (already) doesn't need to be a member. Having it 
optionally emit a weak_ptr to its return value would simplify the tests a 
little.

So MemIndexSymbolsRecycled would look something like

Index I;
std::weak_ptr> Symbols;
I.build(generateNumSymbols(0, 10), &Symbols);
EXPECT_THAT(match(I, Req), ElementsAre("7"));
EXPECT_FALSE(Symbols.expired());
I.build(generateNumSymbols(0,0));
EXPECT_TRUE(Symbols.expired());

WDYT?



Comment at: unittests/clangd/IndexTests.cpp:62
+  Slab->Slab.insert(symbol(std::to_string(i)));
+auto *Symbols = new std::vector();
+

This is a leak - the returned shared_ptr doesn't own this vector.

(I'm actually fine with this for simplicity if it's documented - but we might 
be running leak checkers that complain about it?)

In principle you need to make_shared a struct containing both the vector and 
the slab.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-13 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 126775.
ioeric added a comment.

- Remove everything except for SymbolIndex interface and MemIndex
- Added unit tests for MemIndex


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/MemIndex.cpp
  clangd/index/MemIndex.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/IndexTests.cpp

Index: unittests/clangd/IndexTests.cpp
===
--- /dev/null
+++ unittests/clangd/IndexTests.cpp
@@ -0,0 +1,118 @@
+//===-- IndexTests.cpp  ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+  Symbol Sym;
+  Sym.ID = SymbolID(ID);
+  Sym.QualifiedName = ID;
+  return Sym;
+}
+
+struct CountedSymbolSlab {
+  CountedSymbolSlab() = delete;
+
+  explicit CountedSymbolSlab(int &Counter) : Counter(Counter) { ++Counter; }
+
+  ~CountedSymbolSlab() { Counter--; }
+
+  SymbolSlab Slab;
+  // A counter for the number of living slabs.
+  int &Counter;
+};
+
+class MemIndexTest : public ::testing::Test {
+protected:
+  std::vector
+  match(std::shared_ptr> Symbols,
+const FuzzyFindRequest &Req) {
+Index.build(std::move(Symbols));
+std::vector Matches;
+Index.fuzzyFind(
+Req, [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+return Matches;
+  }
+
+  // Build a CountedSymbolSlab containing symbols with IDs and names [Begin,
+  // End]. The life time of the slab is managed by the returned shared_ptr,
+  // which points to a vector of pointers pointing to all symbols in the slab.
+  std::shared_ptr>
+  generateNumSymbols(int Begin, int End) {
+auto Slab = std::make_shared(SlabCounter);
+
+for (int i = Begin; i <= End; i++)
+  Slab->Slab.insert(symbol(std::to_string(i)));
+auto *Symbols = new std::vector();
+
+for (const auto &Sym : Slab->Slab)
+  Symbols->push_back(&Sym.second);
+return std::shared_ptr>(std::move(Slab),
+Symbols);
+  }
+
+  int SlabCounter = 0;
+  MemIndex Index;
+};
+
+TEST_F(MemIndexTest, MemIndexSymbolsRecycled) {
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  auto Matches = match(generateNumSymbols(0, 10), Req);
+  EXPECT_THAT(Matches, UnorderedElementsAre("7"));
+
+  EXPECT_EQ(SlabCounter, 1);
+  // Release old symbols.
+  Index.build(std::make_shared>());
+  EXPECT_EQ(SlabCounter, 0);
+}
+
+TEST_F(MemIndexTest, MemIndexMatchSubstring) {
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  auto Matches = match(generateNumSymbols(5, 25), Req);
+  EXPECT_THAT(Matches, UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST_F(MemIndexTest, MemIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  auto Matches = match(std::move(Symbols), Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST_F(MemIndexTest, MemIndexLimitedNumMatches) {
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  auto Matches = match(generateNumSymbols(0, 100), Req);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -13,6 +13,7 @@
   CodeCompleteTests.cpp
   ContextTests.cpp
   FuzzyMatchTests.cpp
+  IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
Index: clangd/index/MemIndex.h
===
--- /dev/null
+++ clangd/index/MemIndex.h
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include 
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index f

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-13 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

I think this patch is too big, and there are several separate pieces here:

- the index interface, and the implementation of the dynamic symbol index 
(these probably belong together). We're missing unit tests for the 
implementation.
- building and updating the dynamic symbol index in clangd
- code completion based on a symbol index

As a consequence I'm finding it pretty intimidating to review, I've tried to 
leave the main high-level comments.




Comment at: clangd/ClangdIndex.h:24
+
+/// \brief This manages symbols from source files that can be updated or
+/// removed. Symbols are used by a symbol index which builds indexes on symbols

nit: maybe call this out as a container? that tells a lot about what it 
does/doesn't do.

e.g. `\brief A container of Symbols from several source files. It can be 
updated at source-file granularity, replacing all symbols from one file with a 
new set.` ...



Comment at: clangd/ClangdUnit.cpp:854
  NewAST->getDiagnostics().end());
+  if (That->FileSyms) {
+auto Symbols =

Having clangdunit know how the index is maintained feels like a layering 
violation.

Can we instead have clangdunit take a callback 
(`UniqueFunction` or so) that gets invoked after each 
successful build? That can be the "hook" where we do indexing.

The common higher layer (ClangdServer) then just has to have the glue code 
tying the ClangdUnit API to the Index functionality.



Comment at: clangd/ClangdUnitStore.h:29
 public:
+  explicit CppFileCollection(FileSymbols *FileSyms) : FileSyms(FileSyms) {}
+

Similarly here, this can take a post-build callback of void(Filename, 
ParsedAST&) that is translated into per-cppfile callbacks. Then it doesn't need 
to know about FileSymbols.

(Or ParsedAST*, with nullptr meaning this file is gone)



Comment at: clangd/CodeComplete.h:50
 
+  /// Add symbols in namespace context (including global namespace) to code
+  /// completion.

I find the config changes pretty confusing.
CodeComplete options is for options we want to offer users. This change doesn't 
seem motivated by a user requirement, but rather by implementation details.
If we're going to do that, we should be clear about it, rather than just expose 
every option of clang::CodeCompletionOptions we might want to set ourselves

If we do want to ensure that we can do clangd::CodeCompleteOptions -> 
clang::CodeCompleteOptions without extra args:

CodeCompleteOptions {
  ...
  const Index* Index = nullptr; // Populated internally by clangd, do not 
set
}

then we can use the presence of index to determine whether to restrict what we 
ask Sema for.
(We could also let users override the index used for code completion this way 
if we want, but I don't see a use case)



Comment at: clangd/index/FileIndex.h:62
+/// whose symbols can be easily managed in memory.
+class FileSymbolsMemIndex : public SymbolIndex {
+public:

I think it'd be possible and nice to decouple `MemIndex` from FileSymbols, such 
that we can use it for the on-disk index (which is one big slab).

At the moment it seems like an unhappy mix - we don't actually get any 
incrementality, but we're coupled to the incremental storage.

I think it's worth addressing now as MemIndex should be named differently and 
move to another file if this is the case.

One way this could work:
  class MemIndex {
shared_ptr> Symbols;

// Symbols must remain accessible as long as S is kept alive.
build(shared_ptr> S) {
  // TODO: build smarter index structures later
  lock_guard
  Symbols = move(S); // releases old symbols
  // TODO: move smart index structures
}
  }

  class FileSymbols {
// The shared_ptr keeps the symbols alive
shared_ptr> allSymbols() {
  struct Snapshot {
vector Pointers;
vector> KeepAlive;
  };
  auto Snap = make_shared();
  for (Slab in FileToSymbols) {
Snap->KeepAlive.push_back(Slab);
for (Sym in *Slab)
  Snap.Pointers.push_back(&Sym);
  }
  return shared_ptr>(move(Snap), &Snap->Pointers);
}
  }

  // In ClangdServer, when getting new symbols for a file
  FileSymbols.update(Path, Collector.takeSymbols());
  DynamicIndex.build(FileSymbols.allSymbols());

This seems pretty nice, but vector> would work in 
practice for the interface too, if you don't like aliasing shared_ptr tricks.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-13 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 126686.
ioeric added a comment.

Merged with origin/master and moved index-related files to index/ subdirectory.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/ClangdLSPServer.cpp
  clangd/ClangdLSPServer.h
  clangd/ClangdServer.cpp
  clangd/ClangdServer.h
  clangd/ClangdUnit.cpp
  clangd/ClangdUnit.h
  clangd/ClangdUnitStore.cpp
  clangd/ClangdUnitStore.h
  clangd/CodeComplete.cpp
  clangd/CodeComplete.h
  clangd/index/FileIndex.cpp
  clangd/index/FileIndex.h
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  clangd/tool/ClangdMain.cpp

Index: clangd/tool/ClangdMain.cpp
===
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -90,6 +90,15 @@
 "Trace internal events and timestamps in chrome://tracing JSON format"),
 llvm::cl::init(""), llvm::cl::Hidden);
 
+static llvm::cl::opt EnableIndexBasedCompletion(
+"enable-index-based-completion",
+llvm::cl::desc(
+"Enable index-based global code completion (experimental). Clangd will "
+"use symbols built from ASTs of opened files and additional indexes "
+"(e.g. offline built codebase-wide symbol table) to complete partial "
+"symbols."),
+llvm::cl::init(false));
+
 int main(int argc, char *argv[]) {
   llvm::cl::ParseCommandLineOptions(argc, argv, "clangd");
 
@@ -170,9 +179,15 @@
   clangd::CodeCompleteOptions CCOpts;
   CCOpts.EnableSnippets = EnableSnippets;
   CCOpts.IncludeIneligibleResults = IncludeIneligibleResults;
+  if (EnableIndexBasedCompletion) {
+// Disable sema code completion for qualified code completion and use global
+// symbol index instead.
+CCOpts.IncludeNamespaceLevelDecls = false;
+  }
   // Initialize and run ClangdLSPServer.
   ClangdLSPServer LSPServer(Out, WorkerThreadsCount, StorePreamblesInMemory,
-CCOpts, ResourceDirRef, CompileCommandsDirPath);
+CCOpts, ResourceDirRef, CompileCommandsDirPath,
+EnableIndexBasedCompletion);
   constexpr int NoShutdownRequestErrorCode = 1;
   llvm::set_thread_name("clangd.main");
   return LSPServer.run(std::cin) ? 0 : NoShutdownRequestErrorCode;
Index: clangd/index/SymbolCollector.h
===
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -24,6 +24,12 @@
 public:
   SymbolCollector() = default;
 
+  void initialize(ASTContext &Ctx) override { ASTCtx = &Ctx; }
+
+  void setPreprocessor(std::shared_ptr PP) override {
+this->PP = std::move(PP);
+  }
+
   bool
   handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles,
   ArrayRef Relations, FileID FID,
@@ -37,6 +43,9 @@
 private:
   // All Symbols collected from the AST.
   SymbolSlab Symbols;
+
+  ASTContext *ASTCtx;
+  std::shared_ptr PP;
 };
 
 } // namespace clangd
Index: clangd/index/SymbolCollector.cpp
===
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -8,8 +8,8 @@
 //===--===//
 
 #include "SymbolCollector.h"
-
 #include "clang/AST/ASTContext.h"
+#include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/Basic/SourceManager.h"
@@ -57,6 +57,107 @@
   }
   return AbsolutePath.str();
 }
+
+std::string getDocumentation(const CodeCompletionString &CCS) {
+  // Things like __attribute__((nonnull(1,3))) and [[noreturn]]. Present this
+  // information in the documentation field.
+  std::string Result;
+  const unsigned AnnotationCount = CCS.getAnnotationCount();
+  if (AnnotationCount > 0) {
+Result += "Annotation";
+if (AnnotationCount == 1) {
+  Result += ": ";
+} else /* AnnotationCount > 1 */ {
+  Result += "s: ";
+}
+for (unsigned I = 0; I < AnnotationCount; ++I) {
+  Result += CCS.getAnnotation(I);
+  Result.push_back(I == AnnotationCount - 1 ? '\n' : ' ');
+}
+  }
+  // Add brief documentation (if there is any).
+  if (CCS.getBriefComment() != nullptr) {
+if (!Result.empty()) {
+  // This means we previously added annotations. Add an extra newline
+  // character to make the annotations stand out.
+  Result.push_back('\n');
+}
+Result += CCS.getBriefComment();
+  }
+  return Result;
+}
+
+void ProcessChunks(const CodeCompletionString &CCS, Symbol *Sym) {
+  for (const auto &Chunk : CCS) {
+// Informative qualifier chunks only clutter completion results, skip
+// them.
+if (Chunk.Kind == CodeCompletionString::CK_Informative &&
+StringRef(Chunk.Text).endswith("::"))
+  continue;
+
+switch (Chunk.Kind) {
+case CodeCompletionString

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-11 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clangd/ClangdIndex.h:1
+//===--- ClangdIndex.h - Symbol indexes for clangd.---*- 
C++-*-===//
+//

sammccall wrote:
> nit: `Clangd` prefix doesn't do much.
> I'd suggest `Index.h` for the interface (including Symbol), and `MemIndex.h` 
> for the in-memory implementations?
I'll do the renaming when D40897  is landed since it defines Symbol and would 
probably create `Index.h` as well.



Comment at: clangd/ClangdIndex.h:50
+// relatively small symbol table built offline.
+class InMemoryIndexSourcer {
+public:

sammccall wrote:
> As discussed offline - the lifetime of the contained symbols and the 
> operations on the index derived from it need to be carefully considered.
> 
> Involving inheritance here seems likely to make these tricky interactions 
> even trickier.
> 
> Would suggest:
>  - a concrete class (`SymbolCache`? `FileSymbols`?) to hold a collection of 
> symbols from multiple files, with the ability to iterate over them and 
> replace all the symbols from one file at once
>  - a concrete class `MemIndex` that can be constructed from a sequence of 
> symbols and implements `Index`
> 
> You probably want to make them both immutable with snapshot semantics, or 
> have a reader-writer lock that spans both.
The current revision implements a `FileSymbols` with the snapshot semantics. 
Not entirely sure if this is what you are looking for. Let me know...



Comment at: clangd/ClangdIndex.h:72
+   /// collected.
+   void update(PathRef Path, ASTContext &Context,
+   std::shared_ptr PP,

sammccall wrote:
> The AST -> symbol conversion doesn't seem to have much to do with the rest of 
> the class - we can move this to a free function I think, which would give the 
> class a narrower responsibility.
Moved the conversion code to ClangdUnit.cpp



Comment at: clangd/CodeComplete.cpp:378
+// FIXME: output a warning to logger if there are results from sema.
+return qualifiedIdCompletionWithIndex(*Index, S, **OptSS, &Items);
+  }

sammccall wrote:
> I don't like the idea of doing index lookups from the middle of a sema 
> callback!
> 
> Can we just record the CCContext  in the Collector instead, and do this work 
> afterwards?
Good point!


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-11 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 126379.
ioeric marked 8 inline comments as done.
ioeric added a comment.

- Address review comments.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/ClangdIndex.cpp
  clangd/ClangdIndex.h
  clangd/ClangdLSPServer.cpp
  clangd/ClangdLSPServer.h
  clangd/ClangdServer.cpp
  clangd/ClangdServer.h
  clangd/ClangdUnit.cpp
  clangd/ClangdUnit.h
  clangd/ClangdUnitStore.cpp
  clangd/ClangdUnitStore.h
  clangd/CodeComplete.cpp
  clangd/CodeComplete.h
  clangd/Symbol.cpp
  clangd/Symbol.h
  clangd/SymbolCompletionInfo.cpp
  clangd/SymbolCompletionInfo.h
  clangd/SymbolIndex.h
  clangd/tool/ClangdMain.cpp
  unittests/clangd/CMakeLists.txt
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===
--- /dev/null
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -0,0 +1,110 @@
+//===-- SymbolCollectorTests.cpp  ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#include "Symbol.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include 
+#include 
+
+using testing::ElementsAre;
+using testing::Eq;
+using testing::Field;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+ public:
+  SymbolIndexActionFactory() = default;
+
+  clang::FrontendAction *create() override {
+index::IndexingOptions IndexOpts;
+IndexOpts.SystemSymbolFilter =
+index::IndexingOptions::SystemSymbolFilterKind::All;
+IndexOpts.IndexFunctionLocals = false;
+Collector = std::make_shared();
+FrontendAction *Action =
+index::createIndexingAction(Collector, IndexOpts, nullptr).release();
+return Action;
+  }
+
+  std::shared_ptr Collector;
+};
+
+class SymbolCollectorTest : public ::testing::Test {
+public:
+  bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) {
+llvm::IntrusiveRefCntPtr InMemoryFileSystem(
+new vfs::InMemoryFileSystem);
+llvm::IntrusiveRefCntPtr Files(
+new FileManager(FileSystemOptions(), InMemoryFileSystem));
+
+const std::string FileName = "symbol.cc";
+const std::string HeaderName = "symbols.h";
+auto Factory = llvm::make_unique();
+
+tooling::ToolInvocation Invocation(
+{"symbol_collector", "-fsyntax-only", "-std=c++11", FileName},
+Factory->create(), Files.get(),
+std::make_shared());
+
+InMemoryFileSystem->addFile(HeaderName, 0,
+llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+
+std::string Content = "#include\"" + std::string(HeaderName) + "\"";
+Content += "\n" + MainCode.str();
+InMemoryFileSystem->addFile(FileName, 0,
+llvm::MemoryBuffer::getMemBuffer(Content));
+Invocation.run();
+Symbols = Factory->Collector->getSymbols();
+return true;
+  }
+
+protected:
+  std::set Symbols;
+};
+
+TEST_F(SymbolCollectorTest, CollectSymbol) {
+  const std::string Header = R"(
+class Foo {
+ void f();
+};
+void f1();
+inline void f2() {}
+  )";
+  const std::string Main = R"(
+namespace {
+void ff() {} // ignore
+}
+void f1() {}
+  )";
+  runSymbolCollector(Header, Main);
+  EXPECT_THAT(Symbols,
+  UnorderedElementsAre(Field(&Symbol::QualifiedName, Eq("Foo")),
+   Field(&Symbol::QualifiedName, Eq("Foo::f")),
+   Field(&Symbol::QualifiedName, Eq("f1")),
+   Field(&Symbol::QualifiedName, Eq("f2";
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -15,6 +15,7 @@
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
+  SymbolCollectorTests.cpp
   )
 
 target_link_libraries(ClangdTests
Index: clangd/tool/ClangdMain.cpp
===
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -90,6 +90,15 @@
 "Trace internal events and timestamps in chrome://tracing JSON format"),
 llvm::cl::init("

Re: [PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-08 Thread Marc-André Laperle via cfe-commits
A Hangouts meeting sounds good! Yes, let's arrange via emails.


From: Haojian Wu via Phabricator 
Sent: Friday, December 8, 2017 7:14:42 AM
To: ioe...@google.com; Marc-André Laperle; sammcc...@google.com
Cc: hok...@google.com; kli...@google.com; mgo...@gentoo.org; 
ibiryu...@google.com; cfe-commits@lists.llvm.org
Subject: [PATCH] D40548: [clangd] Symbol index interfaces and index-based code 
completion.

hokein added a comment.

Thanks for the feedback, Marc!

Yeah, I think the ClangdIndexDataSymbol and ClangdIndexDataOccurrence are 
something similar to what https://reviews.llvm.org/D40897 trying to address, 
maybe put the discussion there? Before that, I think having a sym meeting is a 
good idea to keep us in the same page.

In https://reviews.llvm.org/D40548#949279, @ioeric wrote:

> >
>
> I think some of the ideas here could be useful. This patch focuses mostly on 
> index interfaces and https://reviews.llvm.org/D40897 emphasizes on the design 
> of symbol structure. The way symbols are stored and used in this patch is 
> likely to change depending on how https://reviews.llvm.org/D40897 goes.
>
> > The "Clangd" prefix adds a bit much of clutter so maybe it should be 
> > removed.  I think the main points are that having generic 
> > foreachSymbols/foreachOccurrence with callbacks is well suited to implement 
> > multiple features with minimal copying.
>
> Although I'm not sure if `foreachSymbols`/... would be feasible for all 
> indexes yet, we do plan to switch to callback-based index interfaces, which 
> Sam also proposed in the review comments.
>
> There have been some offline discussions happening around clangd's indexing, 
> and sorry that we have not been able to keep you up to date. I think it might 
> be more efficient if we could meet via VC/Hangouts and sync on our designs. 
> If you don't mind a meeting, I am happy to arrange it via emails.





Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-08 Thread Haojian Wu via Phabricator via cfe-commits
hokein added a comment.

Thanks for the feedback, Marc!

Yeah, I think the ClangdIndexDataSymbol and ClangdIndexDataOccurrence are 
something similar to what https://reviews.llvm.org/D40897 trying to address, 
maybe put the discussion there? Before that, I think having a sym meeting is a 
good idea to keep us in the same page.

In https://reviews.llvm.org/D40548#949279, @ioeric wrote:

> >   
>
> I think some of the ideas here could be useful. This patch focuses mostly on 
> index interfaces and https://reviews.llvm.org/D40897 emphasizes on the design 
> of symbol structure. The way symbols are stored and used in this patch is 
> likely to change depending on how https://reviews.llvm.org/D40897 goes.
>
> > The "Clangd" prefix adds a bit much of clutter so maybe it should be 
> > removed.  I think the main points are that having generic 
> > foreachSymbols/foreachOccurrence with callbacks is well suited to implement 
> > multiple features with minimal copying.
>
> Although I'm not sure if `foreachSymbols`/... would be feasible for all 
> indexes yet, we do plan to switch to callback-based index interfaces, which 
> Sam also proposed in the review comments.
>
> There have been some offline discussions happening around clangd's indexing, 
> and sorry that we have not been able to keep you up to date. I think it might 
> be more efficient if we could meet via VC/Hangouts and sync on our designs. 
> If you don't mind a meeting, I am happy to arrange it via emails.





Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-08 Thread Eric Liu via Phabricator via cfe-commits
ioeric added a comment.

In https://reviews.llvm.org/D40548#949182, @malaperle wrote:

> As a follow-up, here's the interface for querying the index that I am using 
> right now. It's meant to be able to retrieve from any kind of "backend", i.e. 
> in-memory, ClangdIndexDataStore, libIndexStore, etc. I was able to implement 
> "Open Workspace Symbol" (which is close to code completion in concept), Find 
> References and Find Definitions.
>
>   using USR = llvm::SmallString<256>;
>  
>   class ClangdIndexDataOccurrence;
>  
>   class ClangdIndexDataSymbol {
>   public:
> virtual index::SymbolKind getKind() = 0;
> /// For example, for mynamespace::myclass::mymethod, this will be
> /// mymethod.
> virtual std::string getName() = 0;
> /// For example, for mynamespace::myclass::mymethod, this will be
> /// mynamespace::myclass::
> virtual std::string getQualifier() = 0;
> virtual std::string getUsr() = 0;
>  
> virtual void foreachOccurrence(index::SymbolRoleSet Roles, 
> llvm::function_ref Receiver) = 0;
>  
> virtual ~ClangdIndexDataSymbol() = default;
>   };
>  
>   class ClangdIndexDataOccurrence {
>   public:
> enum class OccurrenceType : uint16_t {
>OCCURRENCE,
>DEFINITION_OCCURRENCE
>  };
>  
> virtual OccurrenceType getKind() const = 0;
> virtual std::string getPath() = 0;
> /// Get the start offset of the symbol occurrence. The SourceManager can 
> be
> /// used for implementations that need to convert from a line/column
> /// representation to an offset.
> virtual uint32_t getStartOffset(SourceManager &SM) = 0;
> /// Get the end offset of the symbol occurrence. The SourceManager can be
> /// used for implementations that need to convert from a line/column
> /// representation to an offset.
> virtual uint32_t getEndOffset(SourceManager &SM) = 0;
> virtual ~ClangdIndexDataOccurrence() = default;
> //TODO: Add relations
>  
> static bool classof(const ClangdIndexDataOccurrence *O) { return 
> O->getKind() == OccurrenceType::OCCURRENCE; }
>   };
>  
>   /// An occurrence that also has definition with a body that requires 
> additional
>   /// locations to keep track of the beginning and end of the body.
>   class ClangdIndexDataDefinitionOccurrence : public 
> ClangdIndexDataOccurrence {
>   public:
> virtual uint32_t getDefStartOffset(SourceManager &SM) = 0;
> virtual uint32_t getDefEndOffset(SourceManager &SM) = 0;
>  
> static bool classof(const ClangdIndexDataOccurrence *O) { return 
> O->getKind() == OccurrenceType::DEFINITION_OCCURRENCE; }
>   };
>  
>   class ClangdIndexDataProvider {
>   public:
>  
> virtual void foreachSymbols(StringRef Pattern, 
> llvm::function_ref Receiver) = 0;
> virtual void foreachSymbols(const USR &Usr, 
> llvm::function_ref Receiver) = 0;
>  
> virtual ~ClangdIndexDataProvider() = default;
>   };
>


I think some of the ideas here could be useful. This patch focuses mostly on 
index interfaces and https://reviews.llvm.org/D40897 emphasizes on the design 
of symbol structure. The way symbols are stored and used in this patch is 
likely to change depending on how https://reviews.llvm.org/D40897 goes.

> The "Clangd" prefix adds a bit much of clutter so maybe it should be removed. 
>  I think the main points are that having generic 
> foreachSymbols/foreachOccurrence with callbacks is well suited to implement 
> multiple features with minimal copying.

Although I'm not sure if `foreachSymbols`/... would be feasible for all indexes 
yet, we do plan to switch to callback-based index interfaces, which Sam also 
proposed in the review comments.

There have been some offline discussions happening around clangd's indexing, 
and sorry that we have not been able to keep you up to date. I think it might 
be more efficient if we could meet via VC/Hangouts and sync on our designs. If 
you don't mind a meeting, I am happy to arrange it via emails.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-07 Thread Marc-Andre Laperle via Phabricator via cfe-commits
malaperle added a comment.

As a follow-up, here's the interface for querying the index that I am using 
right now. It's meant to be able to retrieve from any kind of "backend", i.e. 
in-memory, ClangdIndexDataStore, libIndexStore, etc. I was able to implement 
"Open Workspace Symbol" (which is close to code completion in concept), Find 
References and Find Definitions.

  using USR = llvm::SmallString<256>;
  
  class ClangdIndexDataOccurrence;
  
  class ClangdIndexDataSymbol {
  public:
virtual index::SymbolKind getKind() = 0;
/// For example, for mynamespace::myclass::mymethod, this will be
/// mymethod.
virtual std::string getName() = 0;
/// For example, for mynamespace::myclass::mymethod, this will be
/// mynamespace::myclass::
virtual std::string getQualifier() = 0;
virtual std::string getUsr() = 0;
  
virtual void foreachOccurrence(index::SymbolRoleSet Roles, 
llvm::function_ref Receiver) = 0;
  
virtual ~ClangdIndexDataSymbol() = default;
  };
  
  class ClangdIndexDataOccurrence {
  public:
enum class OccurrenceType : uint16_t {
   OCCURRENCE,
   DEFINITION_OCCURRENCE
 };
  
virtual OccurrenceType getKind() const = 0;
virtual std::string getPath() = 0;
/// Get the start offset of the symbol occurrence. The SourceManager can be
/// used for implementations that need to convert from a line/column
/// representation to an offset.
virtual uint32_t getStartOffset(SourceManager &SM) = 0;
/// Get the end offset of the symbol occurrence. The SourceManager can be
/// used for implementations that need to convert from a line/column
/// representation to an offset.
virtual uint32_t getEndOffset(SourceManager &SM) = 0;
virtual ~ClangdIndexDataOccurrence() = default;
//TODO: Add relations
  
static bool classof(const ClangdIndexDataOccurrence *O) { return 
O->getKind() == OccurrenceType::OCCURRENCE; }
  };
  
  /// An occurrence that also has definition with a body that requires 
additional
  /// locations to keep track of the beginning and end of the body.
  class ClangdIndexDataDefinitionOccurrence : public ClangdIndexDataOccurrence {
  public:
virtual uint32_t getDefStartOffset(SourceManager &SM) = 0;
virtual uint32_t getDefEndOffset(SourceManager &SM) = 0;
  
static bool classof(const ClangdIndexDataOccurrence *O) { return 
O->getKind() == OccurrenceType::DEFINITION_OCCURRENCE; }
  };
  
  class ClangdIndexDataProvider {
  public:
  
virtual void foreachSymbols(StringRef Pattern, 
llvm::function_ref Receiver) = 0;
virtual void foreachSymbols(const USR &Usr, 
llvm::function_ref Receiver) = 0;
  
virtual ~ClangdIndexDataProvider() = default;
  };

The "Clangd" prefix adds a bit much of clutter so maybe it should be removed.  
I think the main points are that having generic 
foreachSymbols/foreachOccurrence with callbacks is well suited to implement 
multiple features with minimal copying.




Comment at: clangd/SymbolIndex.h:50
+  virtual llvm::Expected
+  complete(const CompletionRequest &Req) const = 0;
+

sammccall wrote:
> This is finding symbols that fuzzy-match a string, right?
> We shouldn't conflate this with code completion - code completion is 
> context-sensitive, and this query operation will be used for other operations 
> like navigate-to-symbol.
> 
> Suggest `fuzzyFind` or similar.
> (Similarly with CompletionRequest, CompletionSymbol)
Similar to completion is "Open Workspace Symbol". So a more generic query could 
be useful.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-07 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clangd/ClangdIndex.h:24
+/// (re-)scoring symbols from different indexes.
+class CombinedSymbolIndex : public SymbolIndex {
+public:

This seems a little premature to me - it's hard to know if this idea makes 
sense without actually having multiple implementations to mix together. My gut 
feeling is that sorting by (index weight, symbol score) puts too much emphasis 
on the weight.

Can we just have a single optional index for now?



Comment at: clangd/ClangdIndex.h:36
+
+  void addSymbolIndex(llvm::StringRef Label, WeightedIndex Index);
+

If you do keep this class, move the label inside the struct? Avoids a lot of 
std::pair



Comment at: clangd/ClangdIndex.h:50
+// relatively small symbol table built offline.
+class InMemoryIndexSourcer {
+public:

As discussed offline - the lifetime of the contained symbols and the operations 
on the index derived from it need to be carefully considered.

Involving inheritance here seems likely to make these tricky interactions even 
trickier.

Would suggest:
 - a concrete class (`SymbolCache`? `FileSymbols`?) to hold a collection of 
symbols from multiple files, with the ability to iterate over them and replace 
all the symbols from one file at once
 - a concrete class `MemIndex` that can be constructed from a sequence of 
symbols and implements `Index`

You probably want to make them both immutable with snapshot semantics, or have 
a reader-writer lock that spans both.



Comment at: clangd/ClangdIndex.h:72
+   /// collected.
+   void update(PathRef Path, ASTContext &Context,
+   std::shared_ptr PP,

The AST -> symbol conversion doesn't seem to have much to do with the rest of 
the class - we can move this to a free function I think, which would give the 
class a narrower responsibility.



Comment at: clangd/ClangdIndex.h:1
+//===--- ClangdIndex.h - Symbol indexes for clangd.---*- 
C++-*-===//
+//

nit: `Clangd` prefix doesn't do much.
I'd suggest `Index.h` for the interface (including Symbol), and `MemIndex.h` 
for the in-memory implementations?



Comment at: clangd/ClangdLSPServer.h:37
+  llvm::Optional CompileCommandsDir,
+  std::vector<
+  std::pair>

ilya-biryukov wrote:
> Maybe pass a parameter of type `SymbolIndex&` instead of a vector, which is 
> used to create `CombinedSymbolIndex` later?
> It seems that `ClangdServer` does not really care about the specific index 
> implementation that's used (nor should it!)
> 
> We could have helper methods to conveniently create `CombinedSymbolIndex` 
> from a list of indexes, or even create the default index for clangd. 
I think the issue is that ClangdServer is going to create and maintain the 
dynamic index, which should then be merged as a peer with a set of other 
indexes.

Can we just pass in a single SymbolIndex* StaticIdx, and hard-code the 
assumption about static index + dynamic index being what we're using? That way 
we can also hard-code the heuristics for merging them together, instead of 
data-driving everything.



Comment at: clangd/ClangdLSPServer.h:38
+  llvm::Optional CompileCommandsDir,
+  bool EnableIndexBasedCodeCompletion,
+  std::vector<

this is kind of two options in one - "should we build the index" and "should we 
query the index".

That seems OK, but I'd consider renaming this to BuildDynamicSymbolIndex or 
something to make it clear that building is optional, querying it isn't 
provided as an option



Comment at: clangd/CodeComplete.cpp:378
+// FIXME: output a warning to logger if there are results from sema.
+return qualifiedIdCompletionWithIndex(*Index, S, **OptSS, &Items);
+  }

I don't like the idea of doing index lookups from the middle of a sema callback!

Can we just record the CCContext  in the Collector instead, and do this work 
afterwards?



Comment at: clangd/CodeComplete.h:71
 /// Get code completions at a specified \p Pos in \p FileName.
+/// If `Index` is not nullptr, this will use the index for global code
+/// completion; otherwise, use sema code completion by default.

I'm not sure you want to spell out this behavior - merging vs replacing is more 
of an implementation detail



Comment at: clangd/SymbolIndex.h:45
+
+class SymbolIndex {
+public:

Interfaces need doc :-)



Comment at: clangd/SymbolIndex.h:49
+
+  virtual llvm::Expected
+  complete(const CompletionRequest &Req) const = 0;

As discussed, we may want a callback-based API that makes it easier for an 
implementation to manage the lifetime of symbols without copying them.

Also is there anything to do with errors other than log and treat as no

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-07 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 125962.
ioeric added a comment.

Diff on https://reviews.llvm.org/D40897 instead origin/master!


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/ClangdIndex.cpp
  clangd/ClangdIndex.h
  clangd/ClangdLSPServer.cpp
  clangd/ClangdLSPServer.h
  clangd/ClangdServer.cpp
  clangd/ClangdServer.h
  clangd/ClangdUnit.cpp
  clangd/ClangdUnit.h
  clangd/ClangdUnitStore.cpp
  clangd/ClangdUnitStore.h
  clangd/CodeComplete.cpp
  clangd/CodeComplete.h
  clangd/Symbol.cpp
  clangd/Symbol.h
  clangd/SymbolCompletionInfo.cpp
  clangd/SymbolCompletionInfo.h
  clangd/SymbolIndex.h
  clangd/tool/ClangdMain.cpp
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -36,15 +36,15 @@
  public:
   SymbolIndexActionFactory() = default;
 
-  clang::ASTFrontendAction *create() override {
+  clang::FrontendAction *create() override {
 index::IndexingOptions IndexOpts;
 IndexOpts.SystemSymbolFilter =
 index::IndexingOptions::SystemSymbolFilterKind::All;
 IndexOpts.IndexFunctionLocals = false;
 Collector = std::make_shared();
 FrontendAction *Action =
 index::createIndexingAction(Collector, IndexOpts, nullptr).release();
-return llvm::cast(Action);
+return Action;
   }
 
   std::shared_ptr Collector;
Index: clangd/tool/ClangdMain.cpp
===
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -90,6 +90,15 @@
 "Trace internal events and timestamps in chrome://tracing JSON format"),
 llvm::cl::init(""), llvm::cl::Hidden);
 
+static llvm::cl::opt EnableIndexBasedCompletion(
+"enable-index-based-completion",
+llvm::cl::desc(
+"Enable index-based global code completion (experimental). Clangd will "
+"use symbols built from ASTs of opened files and additional indexes "
+"(e.g. offline built codebase-wide symbol table) to complete partial "
+"symbols."),
+llvm::cl::init(false));
+
 int main(int argc, char *argv[]) {
   llvm::cl::ParseCommandLineOptions(argc, argv, "clangd");
 
@@ -170,10 +179,15 @@
   clangd::CodeCompleteOptions CCOpts;
   CCOpts.EnableSnippets = EnableSnippets;
   CCOpts.IncludeIneligibleResults = IncludeIneligibleResults;
+  if (EnableIndexBasedCompletion) {
+// Disable sema code completion for qualified code completion and use global
+// symbol index instead.
+CCOpts.IncludeNamespaceLevelDecls = false;
+  }
   // Initialize and run ClangdLSPServer.
   ClangdLSPServer LSPServer(Out, WorkerThreadsCount, StorePreamblesInMemory,
-CCOpts, ResourceDirRef,
-CompileCommandsDirPath);
+CCOpts, ResourceDirRef, CompileCommandsDirPath,
+EnableIndexBasedCompletion, {});
   constexpr int NoShutdownRequestErrorCode = 1;
   llvm::set_thread_name("clangd.main");
   return LSPServer.run(std::cin) ? 0 : NoShutdownRequestErrorCode;
Index: clangd/SymbolIndex.h
===
--- /dev/null
+++ clangd/SymbolIndex.h
@@ -0,0 +1,60 @@
+//===--- CompletionIndex.h - Index for code completion ---*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLINDEX_H
+
+#include "SymbolCompletionInfo.h"
+#include "llvm/Support/Error.h"
+
+namespace clang {
+namespace clangd {
+
+struct CompletionRequest {
+  std::string Query;
+  size_t MaxCandidateCount = UINT_MAX;
+};
+
+/// \brief Signals for scoring completion candidates.
+struct ScoreSignals {
+  // FIXME: add score signals like cross-reference count.
+};
+
+struct CompletionSymbol {
+  ScoreSignals Signals;
+  float SymbolScore;
+
+  std::string USR;
+  index::SymbolKind Kind;
+  std::string QualifiedName;
+
+  SymbolCompletionInfo CompletionInfo;
+};
+
+struct CompletionResult {
+  std::vector Symbols;
+  bool AllMatched;
+};
+
+class SymbolIndex {
+public:
+  virtual ~SymbolIndex() = default;
+
+  virtual llvm::Expected
+  complete(const CompletionRequest &Req) const = 0;
+
+  // FIXME: add interfaces for more index use cases:
+  //  - Symbol getSymbolInfo(llvm::StringRef USR);
+  //  - getAllOccurrences(llvm::StringRef USR);
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLINDEX_H
Index: clangd/SymbolCompletionInfo.h
===

[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.

2017-12-07 Thread Eric Liu via Phabricator via cfe-commits
ioeric updated this revision to Diff 125960.
ioeric added a comment.

- Use IncludeNamespaceLevelDecls option; fix some broken tests.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/ClangdIndex.cpp
  clangd/ClangdIndex.h
  clangd/ClangdLSPServer.cpp
  clangd/ClangdLSPServer.h
  clangd/ClangdServer.cpp
  clangd/ClangdServer.h
  clangd/ClangdUnit.cpp
  clangd/ClangdUnit.h
  clangd/ClangdUnitStore.cpp
  clangd/ClangdUnitStore.h
  clangd/CodeComplete.cpp
  clangd/CodeComplete.h
  clangd/Symbol.cpp
  clangd/Symbol.h
  clangd/SymbolCompletionInfo.cpp
  clangd/SymbolCompletionInfo.h
  clangd/SymbolIndex.h
  clangd/tool/ClangdMain.cpp
  unittests/clangd/CMakeLists.txt
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===
--- /dev/null
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -0,0 +1,110 @@
+//===-- SymbolCollectorTests.cpp  ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+
+#include "Symbol.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include 
+#include 
+
+using testing::ElementsAre;
+using testing::Eq;
+using testing::Field;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+ public:
+  SymbolIndexActionFactory() = default;
+
+  clang::FrontendAction *create() override {
+index::IndexingOptions IndexOpts;
+IndexOpts.SystemSymbolFilter =
+index::IndexingOptions::SystemSymbolFilterKind::All;
+IndexOpts.IndexFunctionLocals = false;
+Collector = std::make_shared();
+FrontendAction *Action =
+index::createIndexingAction(Collector, IndexOpts, nullptr).release();
+return Action;
+  }
+
+  std::shared_ptr Collector;
+};
+
+class SymbolCollectorTest : public ::testing::Test {
+public:
+  bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) {
+llvm::IntrusiveRefCntPtr InMemoryFileSystem(
+new vfs::InMemoryFileSystem);
+llvm::IntrusiveRefCntPtr Files(
+new FileManager(FileSystemOptions(), InMemoryFileSystem));
+
+const std::string FileName = "symbol.cc";
+const std::string HeaderName = "symbols.h";
+auto Factory = llvm::make_unique();
+
+tooling::ToolInvocation Invocation(
+{"symbol_collector", "-fsyntax-only", "-std=c++11", FileName},
+Factory->create(), Files.get(),
+std::make_shared());
+
+InMemoryFileSystem->addFile(HeaderName, 0,
+llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+
+std::string Content = "#include\"" + std::string(HeaderName) + "\"";
+Content += "\n" + MainCode.str();
+InMemoryFileSystem->addFile(FileName, 0,
+llvm::MemoryBuffer::getMemBuffer(Content));
+Invocation.run();
+Symbols = Factory->Collector->getSymbols();
+return true;
+  }
+
+protected:
+  std::set Symbols;
+};
+
+TEST_F(SymbolCollectorTest, CollectSymbol) {
+  const std::string Header = R"(
+class Foo {
+ void f();
+};
+void f1();
+inline void f2() {}
+  )";
+  const std::string Main = R"(
+namespace {
+void ff() {} // ignore
+}
+void f1() {}
+  )";
+  runSymbolCollector(Header, Main);
+  EXPECT_THAT(Symbols,
+  UnorderedElementsAre(Field(&Symbol::QualifiedName, Eq("Foo")),
+   Field(&Symbol::QualifiedName, Eq("Foo::f")),
+   Field(&Symbol::QualifiedName, Eq("f1")),
+   Field(&Symbol::QualifiedName, Eq("f2";
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -15,6 +15,7 @@
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
+  SymbolCollectorTests.cpp
   )
 
 target_link_libraries(ClangdTests
Index: clangd/tool/ClangdMain.cpp
===
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -90,6 +90,15 @@
 "Trace internal events and timestamps in chrome://tracing JSON format"),
 llvm::cl::init(""),