kbobyrev created this revision.
kbobyrev added reviewers: ioeric, ilya-biryukov.
kbobyrev added a project: clang-tools-extra.
Herald added subscribers: arphaman, mgrang, jkorous, MaskRay, mgorny.

This patch is a proof-of-concept Dex index implementation. It has several 
flaws, which don't allow replacing static MemIndex yet, such as:

- Not being able to handle queries of small size (less than 3 symbols); a way 
to solve this is generating trigrams of smaller size and having such incomplete 
trigrams in the index structure.
- Speed measurements: while manually editing files in Vim and requesting 
autocompletion gives an impression that the performance is at least comparable 
with the current static index, having actual numbers is important because we 
don't want to hurt the users and roll out slow code. Eric (@ioeric) suggested 
that we should only replace MemIndex as soon as we have the evidence that this 
is not a regression in terms of performance. An approach which is likely to be 
successful here is to wait until we have benchmark library in the LLVM core 
repository, which is something I have suggested in the LLVM mailing lists, 
received positive feedback on and started working on. I will add a dependency 
as soon as the suggested patch is out for a review (currently there's at least 
one complication which is being addressed by 
https://github.com/google/benchmark/pull/649). Key performance improvements for 
iterators are sorting by cost and the limit iterator.
- Quality measurements: currently, boosting iterator and two-phase lookup stage 
are not implemented, without these the quality is likely to be worse than the 
current implementation can yield. Measuring quality is tricky, but another 
suggestion in the offline discussion was that the drop-in replacement should 
only happen after Boosting iterators implementation (and subsequent query 
enhancement).

The proposed changes do not affect Clangd functionality or performance, 
`DexIndex` is only used in unit tests and not in production code.


https://reviews.llvm.org/D50337

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/MemIndex.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/unittests/clangd/CMakeLists.txt
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/IndexHelpers.cpp
  clang-tools-extra/unittests/clangd/IndexHelpers.h
  clang-tools-extra/unittests/clangd/IndexTests.cpp

Index: clang-tools-extra/unittests/clangd/IndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/IndexTests.cpp
+++ clang-tools-extra/unittests/clangd/IndexTests.cpp
@@ -7,33 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IndexHelpers.h"
 #include "index/Index.h"
 #include "index/MemIndex.h"
 #include "index/Merge.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-using testing::UnorderedElementsAre;
 using testing::Pointee;
+using testing::UnorderedElementsAre;
 
 namespace clang {
 namespace clangd {
 namespace {
 
-Symbol symbol(llvm::StringRef QName) {
-  Symbol Sym;
-  Sym.ID = SymbolID(QName.str());
-  size_t Pos = QName.rfind("::");
-  if (Pos == llvm::StringRef::npos) {
-    Sym.Name = QName;
-    Sym.Scope = "";
-  } else {
-    Sym.Name = QName.substr(Pos + 2);
-    Sym.Scope = QName.substr(0, Pos + 2);
-  }
-  return Sym;
-}
-
 MATCHER_P(Named, N, "") { return arg.Name == N; }
 
 TEST(SymbolSlab, FindAndIterate) {
@@ -52,59 +39,6 @@
     EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
 }
 
-struct SlabAndPointers {
-  SymbolSlab Slab;
-  std::vector<const Symbol *> Pointers;
-};
-
-// Create a slab of symbols with the given qualified names as both IDs and
-// names. The life time of the slab is managed by the returned shared pointer.
-// If \p WeakSymbols is provided, it will be pointed to the managed object in
-// the returned shared pointer.
-std::shared_ptr<std::vector<const Symbol *>>
-generateSymbols(std::vector<std::string> QualifiedNames,
-                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  SymbolSlab::Builder Slab;
-  for (llvm::StringRef QName : QualifiedNames)
-    Slab.insert(symbol(QName));
-
-  auto Storage = std::make_shared<SlabAndPointers>();
-  Storage->Slab = std::move(Slab).build();
-  for (const auto &Sym : Storage->Slab)
-    Storage->Pointers.push_back(&Sym);
-  if (WeakSymbols)
-    *WeakSymbols = Storage;
-  auto *Pointers = &Storage->Pointers;
-  return {std::move(Storage), Pointers};
-}
-
-// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
-// to the `generateSymbols` above.
-std::shared_ptr<std::vector<const Symbol *>>
-generateNumSymbols(int Begin, int End,
-                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  std::vector<std::string> Names;
-  for (int i = Begin; i <= End; i++)
-    Names.push_back(std::to_string(i));
-  return generateSymbols(Names, WeakSymbols);
-}
-
-std::string getQualifiedName(const Symbol &Sym) {
-  return (Sym.Scope + Sym.Name).str();
-}
-
-std::vector<std::string> match(const SymbolIndex &I,
-                               const FuzzyFindRequest &Req,
-                               bool *Incomplete = nullptr) {
-  std::vector<std::string> Matches;
-  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
-    Matches.push_back(getQualifiedName(Sym));
-  });
-  if (Incomplete)
-    *Incomplete = IsIncomplete;
-  return Matches;
-}
-
 TEST(MemIndexTest, MemIndexSymbolsRecycled) {
   MemIndex I;
   std::weak_ptr<SlabAndPointers> Symbols;
@@ -212,18 +146,6 @@
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
-// Returns qualified names of symbols with any of IDs in the index.
-std::vector<std::string> lookup(const SymbolIndex &I,
-                                llvm::ArrayRef<SymbolID> IDs) {
-  LookupRequest Req;
-  Req.IDs.insert(IDs.begin(), IDs.end());
-  std::vector<std::string> Results;
-  I.lookup(Req, [&](const Symbol &Sym) {
-    Results.push_back(getQualifiedName(Sym));
-  });
-  return Results;
-}
-
 TEST(MemIndexTest, Lookup) {
   MemIndex I;
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
@@ -269,7 +191,7 @@
 TEST(MergeTest, Merge) {
   Symbol L, R;
   L.ID = R.ID = SymbolID("hello");
-  L.Name = R.Name = "Foo";                    // same in both
+  L.Name = R.Name = "Foo";                           // same in both
   L.CanonicalDeclaration.FileURI = "file:///left.h"; // differs
   R.CanonicalDeclaration.FileURI = "file:///right.h";
   L.References = 1;
Index: clang-tools-extra/unittests/clangd/IndexHelpers.h
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/IndexHelpers.h
@@ -0,0 +1,57 @@
+//===-- IndexHelpers.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
+#include "index/dex/Iterator.h"
+#include "index/dex/Token.h"
+#include "index/dex/Trigram.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName);
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector<const Symbol *> Pointers;
+};
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+std::string getQualifiedName(const Symbol &Sym);
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req,
+                               bool *Incomplete = nullptr);
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/unittests/clangd/IndexHelpers.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/IndexHelpers.cpp
@@ -0,0 +1,89 @@
+//===-- IndexHelpers.cpp ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexHelpers.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName) {
+  Symbol Sym;
+  Sym.ID = SymbolID(QName.str());
+  size_t Pos = QName.rfind("::");
+  if (Pos == llvm::StringRef::npos) {
+    Sym.Name = QName;
+    Sym.Scope = "";
+  } else {
+    Sym.Name = QName.substr(Pos + 2);
+    Sym.Scope = QName.substr(0, Pos + 2);
+  }
+  return Sym;
+}
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  SymbolSlab::Builder Slab;
+  for (llvm::StringRef QName : QualifiedNames)
+    Slab.insert(symbol(QName));
+
+  auto Storage = std::make_shared<SlabAndPointers>();
+  Storage->Slab = std::move(Slab).build();
+  for (const auto &Sym : Storage->Slab)
+    Storage->Pointers.push_back(&Sym);
+  if (WeakSymbols)
+    *WeakSymbols = Storage;
+  auto *Pointers = &Storage->Pointers;
+  return {std::move(Storage), Pointers};
+}
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  std::vector<std::string> Names;
+  for (int i = Begin; i <= End; i++)
+    Names.push_back(std::to_string(i));
+  return generateSymbols(Names, WeakSymbols);
+}
+
+std::string getQualifiedName(const Symbol &Sym) {
+  return (Sym.Scope + Sym.Name).str();
+}
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req, bool *Incomplete) {
+  std::vector<std::string> Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+    Matches.push_back(clang::clangd::getQualifiedName(Sym));
+  });
+  if (Incomplete)
+    *Incomplete = IsIncomplete;
+  return Matches;
+}
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs) {
+  LookupRequest Req;
+  Req.IDs.insert(IDs.begin(), IDs.end());
+  std::vector<std::string> Results;
+  I.lookup(Req, [&](const Symbol &Sym) {
+    Results.push_back(getQualifiedName(Sym));
+  });
+  return Results;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IndexHelpers.h"
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
 #include "index/dex/Iterator.h"
 #include "index/dex/Token.h"
 #include "index/dex/Trigram.h"
@@ -17,11 +21,13 @@
 #include <string>
 #include <vector>
 
+using ::testing::ElementsAre;
+using ::testing::UnorderedElementsAre;
+
 namespace clang {
 namespace clangd {
 namespace dex {
-
-using ::testing::ElementsAre;
+namespace {
 
 TEST(DexIndexIterators, DocumentIterator) {
   const PostingList L = {4, 7, 8, 20, 42, 100};
@@ -312,6 +318,69 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexIndex, Lookup) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::abc", "ns::xyz"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::xyz"));
+  EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+// FIXME(kbobyrev): Use 3+ symbols long names so that trigram index is used.
+TEST(MergeDexIndex, Lookup) {
+  DexIndex I, J;
+  I.build(generateSymbols({"ns::A", "ns::B"}));
+  J.build(generateSymbols({"ns::B", "ns::C"}));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::A")),
+              UnorderedElementsAre("ns::A"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::B")),
+              UnorderedElementsAre("ns::B"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::C")),
+              UnorderedElementsAre("ns::C"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::B")}),
+      UnorderedElementsAre("ns::A", "ns::B"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::C")}),
+      UnorderedElementsAre("ns::A", "ns::C"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::D")),
+              UnorderedElementsAre());
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), {}), UnorderedElementsAre());
+}
+
+// FIXME(kbobyrev): Add more tests on DexIndex? Right now, it's mostly a wrapper
+// around MemIndex, adopting tests from IndexTests.cpp sounds reasonable.
+// However, most of these tests use short names (<3 symbols) for symbol lookups,
+// which currently are dispatched to MemIndex and hence just duplicating these
+// tests while substituting MemIndex with DexIndex is not a viable solution.
+TEST(DexIndex, FuzzyFind) {
+  DexIndex Index;
+  Index.build(generateSymbols(
+      {"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", "other::ABC"}));
+  FuzzyFindRequest Req;
+  Req.Query = "ABC";
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC"));
+  Req.Scopes = {"ns::", "ns::nested::"};
+  EXPECT_THAT(match(Index, Req),
+              UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
+}
+
+TEST(DexIndexTest, FuzzyMatch) {
+  DexIndex I;
+  I.build(
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  FuzzyFindRequest Req;
+  Req.Query = "lol";
+  Req.MaxCandidateCount = 2;
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+} // namespace
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/unittests/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/unittests/clangd/CMakeLists.txt
+++ clang-tools-extra/unittests/clangd/CMakeLists.txt
@@ -23,6 +23,7 @@
   FuzzyMatchTests.cpp
   GlobalCompilationDatabaseTests.cpp
   HeadersTests.cpp
+  IndexHelpers.cpp
   IndexTests.cpp
   QualityTests.cpp
   SourceCodeTests.cpp
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -22,9 +22,9 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 
+#include "../Index.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <string>
 #include <vector>
 
@@ -81,6 +81,8 @@
   }
 };
 
+std::vector<Token> generateSearchTokens(const Symbol &Sym);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/dex/Token.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/Token.cpp
@@ -0,0 +1,25 @@
+//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+#include "Trigram.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+std::vector<Token> generateSearchTokens(const Symbol &Sym) {
+  std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
+  Result.push_back(Token(Token::Kind::Scope, Sym.Scope));
+  return Result;
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -0,0 +1,53 @@
+//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+
+#include "../Index.h"
+#include "../MemIndex.h"
+#include "Iterator.h"
+#include "Token.h"
+#include "Trigram.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// In-memory Dex index implementation.
+class DexIndex : public SymbolIndex {
+public:
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+  // FIXME(kbobyrev): This is the same as in MemIndex. Should I abstract this
+  // out?
+  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
+
+  bool
+  fuzzyFind(const FuzzyFindRequest &Req,
+            llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+  // FIXME(kbobyrev): Symbol lookup is also exactly the same as in MemIndex.
+  virtual void
+  lookup(const LookupRequest &Req,
+         llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+private:
+  std::shared_ptr<std::vector<const Symbol *>> Symbols;
+  llvm::DenseMap<SymbolID, const Symbol *> Index;
+  mutable std::mutex Mutex;
+  llvm::DenseMap<Token, PostingList> InvertedIndex;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -0,0 +1,133 @@
+//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DexIndex.h"
+#include "../../FuzzyMatch.h"
+#include <algorithm>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+  llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
+  for (const Symbol *Sym : *Syms)
+    TempIndex[Sym->ID] = Sym;
+
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    Index = std::move(TempIndex);
+    Symbols = std::move(Syms);
+    std::sort(begin(*Symbols), end(*Symbols),
+              [](const Symbol *LHS, const Symbol *RHS) {
+                return quality(*LHS) > quality(*RHS);
+              });
+    InvertedIndex.clear();
+    for (DocID SymbolRank = 0; SymbolRank < Symbols->size(); ++SymbolRank) {
+      const auto Sym = (*Symbols)[SymbolRank];
+      for (const auto &Token : generateSearchTokens(*Sym)) {
+        if (!InvertedIndex.count(Token))
+          InvertedIndex[Token] = {SymbolRank};
+        else
+          InvertedIndex[Token].push_back(SymbolRank);
+      }
+    }
+  }
+}
+
+bool DexIndex::fuzzyFind(
+    const FuzzyFindRequest &Req,
+    llvm::function_ref<void(const Symbol &)> Callback) const {
+  assert(!StringRef(Req.Query).contains("::") &&
+         "There must be no :: in query.");
+  // FIXME(kbobyrev): Currently, only "long" (3+ symbols) requests are
+  // processed. After tokens are generated for the short queries and index
+  // symbols, this shouldn't be the case.
+  assert(Req.Query.size() >= 3 &&
+         "DexIndex is currently not capable of processing queries of size <3.");
+
+  FuzzyMatcher Filter(Req.Query);
+  bool More;
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
+    std::vector<std::unique_ptr<Iterator>> TrigramIterators;
+    for (const auto &Trigram : TrigramTokens) {
+      const auto It = InvertedIndex.find(Trigram);
+      if (It != InvertedIndex.end())
+        TrigramIterators.push_back(create(It->second));
+    }
+
+    std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+    TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
+
+    std::vector<std::unique_ptr<Iterator>> ScopeIterators;
+    for (const auto &Scope : Req.Scopes) {
+      const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
+      if (It != InvertedIndex.end())
+        ScopeIterators.push_back(create(It->second));
+    }
+    if (ScopeIterators.size())
+      TopLevelChildren.push_back(createOr(move(ScopeIterators)));
+
+    auto QueryIterator = createAnd(move(TopLevelChildren));
+    // FIXME(kbobyrev): Limit the total number of consumed symbols using
+    // Req.MaxCandidateCount. That would require implementing Limit iterator.
+    auto SymbolDocIDs = consume(*QueryIterator);
+
+    More = SymbolDocIDs.size() > Req.MaxCandidateCount;
+    std::vector<std::pair<float, const Symbol *>> Scores(SymbolDocIDs.size());
+    for (size_t SymbolIdx = 0; SymbolIdx < SymbolDocIDs.size(); ++SymbolIdx) {
+      const auto Sym = (*Symbols)[SymbolDocIDs[SymbolIdx]];
+      const auto Score = Filter.match(Sym->Name);
+      assert(Score.hasValue() && "Matched symbol has all Fuzzy Matching "
+                                 "trigrams, it should match the query");
+      Scores[SymbolIdx] = std::make_pair(-*Score * quality(*Sym), Sym);
+    }
+    std::sort(begin(Scores), end(Scores));
+    // FIXME(kbobyrev): Scoring all matched symbols and then shrinking it is
+    // rather expensive, this should be replaced by two-stage filtering as
+    // proposed in Dex design document.
+    if (More)
+      Scores.resize(Req.MaxCandidateCount);
+
+    for (const auto &ScoreSymbol : Scores)
+      Callback(*ScoreSymbol.second);
+  }
+  return More;
+}
+
+void DexIndex::lookup(const LookupRequest &Req,
+                      llvm::function_ref<void(const Symbol &)> Callback) const {
+  for (const auto &ID : Req.IDs) {
+    auto I = Index.find(ID);
+    if (I != Index.end())
+      Callback(*I->second);
+  }
+}
+
+std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
+  struct Snapshot {
+    SymbolSlab Slab;
+    std::vector<const Symbol *> Pointers;
+  };
+  auto Snap = std::make_shared<Snapshot>();
+  Snap->Slab = std::move(Slab);
+  for (auto &Sym : Snap->Slab)
+    Snap->Pointers.push_back(&Sym);
+  auto S = std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap),
+                                                        &Snap->Pointers);
+  auto DexIdx = llvm::make_unique<DexIndex>();
+  DexIdx->build(std::move(S));
+  return std::move(DexIdx);
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/MemIndex.h
===================================================================
--- clang-tools-extra/clangd/index/MemIndex.h
+++ clang-tools-extra/clangd/index/MemIndex.h
@@ -22,7 +22,7 @@
 public:
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+  virtual void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
 
   /// \brief Build index from a symbol slab.
   static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
@@ -38,7 +38,6 @@
 private:
   std::shared_ptr<std::vector<const Symbol *>> Symbols;
   // Index is a set of symbols that are deduplicated by symbol IDs.
-  // FIXME: build smarter index structure.
   llvm::DenseMap<SymbolID, const Symbol *> Index;
   mutable std::mutex Mutex;
 };
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -43,8 +43,10 @@
   index/SymbolCollector.cpp
   index/SymbolYAML.cpp
 
+  index/dex/DexIndex.cpp
   index/dex/Iterator.cpp
   index/dex/Trigram.cpp
+  index/dex/Token.cpp
 
   LINK_LIBS
   clangAST
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to