kbobyrev updated this revision to Diff 159463.
kbobyrev added a comment.

Don't resize retrieved symbols vector, simply let callback process at most 
`MaxCandidateCount` items.


https://reviews.llvm.org/D50337

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/MemIndex.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/unittests/clangd/CMakeLists.txt
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/IndexHelpers.cpp
  clang-tools-extra/unittests/clangd/IndexHelpers.h
  clang-tools-extra/unittests/clangd/IndexTests.cpp

Index: clang-tools-extra/unittests/clangd/IndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/IndexTests.cpp
+++ clang-tools-extra/unittests/clangd/IndexTests.cpp
@@ -7,33 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IndexHelpers.h"
 #include "index/Index.h"
 #include "index/MemIndex.h"
 #include "index/Merge.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-using testing::UnorderedElementsAre;
 using testing::Pointee;
+using testing::UnorderedElementsAre;
 
 namespace clang {
 namespace clangd {
 namespace {
 
-Symbol symbol(llvm::StringRef QName) {
-  Symbol Sym;
-  Sym.ID = SymbolID(QName.str());
-  size_t Pos = QName.rfind("::");
-  if (Pos == llvm::StringRef::npos) {
-    Sym.Name = QName;
-    Sym.Scope = "";
-  } else {
-    Sym.Name = QName.substr(Pos + 2);
-    Sym.Scope = QName.substr(0, Pos + 2);
-  }
-  return Sym;
-}
-
 MATCHER_P(Named, N, "") { return arg.Name == N; }
 
 TEST(SymbolSlab, FindAndIterate) {
@@ -52,59 +39,6 @@
     EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
 }
 
-struct SlabAndPointers {
-  SymbolSlab Slab;
-  std::vector<const Symbol *> Pointers;
-};
-
-// Create a slab of symbols with the given qualified names as both IDs and
-// names. The life time of the slab is managed by the returned shared pointer.
-// If \p WeakSymbols is provided, it will be pointed to the managed object in
-// the returned shared pointer.
-std::shared_ptr<std::vector<const Symbol *>>
-generateSymbols(std::vector<std::string> QualifiedNames,
-                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  SymbolSlab::Builder Slab;
-  for (llvm::StringRef QName : QualifiedNames)
-    Slab.insert(symbol(QName));
-
-  auto Storage = std::make_shared<SlabAndPointers>();
-  Storage->Slab = std::move(Slab).build();
-  for (const auto &Sym : Storage->Slab)
-    Storage->Pointers.push_back(&Sym);
-  if (WeakSymbols)
-    *WeakSymbols = Storage;
-  auto *Pointers = &Storage->Pointers;
-  return {std::move(Storage), Pointers};
-}
-
-// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
-// to the `generateSymbols` above.
-std::shared_ptr<std::vector<const Symbol *>>
-generateNumSymbols(int Begin, int End,
-                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  std::vector<std::string> Names;
-  for (int i = Begin; i <= End; i++)
-    Names.push_back(std::to_string(i));
-  return generateSymbols(Names, WeakSymbols);
-}
-
-std::string getQualifiedName(const Symbol &Sym) {
-  return (Sym.Scope + Sym.Name).str();
-}
-
-std::vector<std::string> match(const SymbolIndex &I,
-                               const FuzzyFindRequest &Req,
-                               bool *Incomplete = nullptr) {
-  std::vector<std::string> Matches;
-  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
-    Matches.push_back(getQualifiedName(Sym));
-  });
-  if (Incomplete)
-    *Incomplete = IsIncomplete;
-  return Matches;
-}
-
 TEST(MemIndexTest, MemIndexSymbolsRecycled) {
   MemIndex I;
   std::weak_ptr<SlabAndPointers> Symbols;
@@ -212,18 +146,6 @@
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
-// Returns qualified names of symbols with any of IDs in the index.
-std::vector<std::string> lookup(const SymbolIndex &I,
-                                llvm::ArrayRef<SymbolID> IDs) {
-  LookupRequest Req;
-  Req.IDs.insert(IDs.begin(), IDs.end());
-  std::vector<std::string> Results;
-  I.lookup(Req, [&](const Symbol &Sym) {
-    Results.push_back(getQualifiedName(Sym));
-  });
-  return Results;
-}
-
 TEST(MemIndexTest, Lookup) {
   MemIndex I;
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
@@ -269,7 +191,7 @@
 TEST(MergeTest, Merge) {
   Symbol L, R;
   L.ID = R.ID = SymbolID("hello");
-  L.Name = R.Name = "Foo";                    // same in both
+  L.Name = R.Name = "Foo";                           // same in both
   L.CanonicalDeclaration.FileURI = "file:///left.h"; // differs
   R.CanonicalDeclaration.FileURI = "file:///right.h";
   L.References = 1;
Index: clang-tools-extra/unittests/clangd/IndexHelpers.h
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/IndexHelpers.h
@@ -0,0 +1,57 @@
+//===-- IndexHelpers.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
+#include "index/dex/Iterator.h"
+#include "index/dex/Token.h"
+#include "index/dex/Trigram.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName);
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector<const Symbol *> Pointers;
+};
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+std::string getQualifiedName(const Symbol &Sym);
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req,
+                               bool *Incomplete = nullptr);
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/unittests/clangd/IndexHelpers.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/IndexHelpers.cpp
@@ -0,0 +1,89 @@
+//===-- IndexHelpers.cpp ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexHelpers.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName) {
+  Symbol Sym;
+  Sym.ID = SymbolID(QName.str());
+  size_t Pos = QName.rfind("::");
+  if (Pos == llvm::StringRef::npos) {
+    Sym.Name = QName;
+    Sym.Scope = "";
+  } else {
+    Sym.Name = QName.substr(Pos + 2);
+    Sym.Scope = QName.substr(0, Pos + 2);
+  }
+  return Sym;
+}
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  SymbolSlab::Builder Slab;
+  for (llvm::StringRef QName : QualifiedNames)
+    Slab.insert(symbol(QName));
+
+  auto Storage = std::make_shared<SlabAndPointers>();
+  Storage->Slab = std::move(Slab).build();
+  for (const auto &Sym : Storage->Slab)
+    Storage->Pointers.push_back(&Sym);
+  if (WeakSymbols)
+    *WeakSymbols = Storage;
+  auto *Pointers = &Storage->Pointers;
+  return {std::move(Storage), Pointers};
+}
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  std::vector<std::string> Names;
+  for (int i = Begin; i <= End; i++)
+    Names.push_back(std::to_string(i));
+  return generateSymbols(Names, WeakSymbols);
+}
+
+std::string getQualifiedName(const Symbol &Sym) {
+  return (Sym.Scope + Sym.Name).str();
+}
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req, bool *Incomplete) {
+  std::vector<std::string> Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+    Matches.push_back(clang::clangd::getQualifiedName(Sym));
+  });
+  if (Incomplete)
+    *Incomplete = IsIncomplete;
+  return Matches;
+}
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs) {
+  LookupRequest Req;
+  Req.IDs.insert(IDs.begin(), IDs.end());
+  std::vector<std::string> Results;
+  I.lookup(Req, [&](const Symbol &Sym) {
+    Results.push_back(getQualifiedName(Sym));
+  });
+  return Results;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IndexHelpers.h"
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
 #include "index/dex/Iterator.h"
 #include "index/dex/Token.h"
 #include "index/dex/Trigram.h"
@@ -17,11 +21,13 @@
 #include <string>
 #include <vector>
 
+using ::testing::ElementsAre;
+using ::testing::UnorderedElementsAre;
+
 namespace clang {
 namespace clangd {
 namespace dex {
-
-using ::testing::ElementsAre;
+namespace {
 
 TEST(DexIndexIterators, DocumentIterator) {
   const PostingList L = {4, 7, 8, 20, 42, 100};
@@ -312,6 +318,69 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexIndex, Lookup) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::abc", "ns::xyz"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::xyz"));
+  EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+// FIXME(kbobyrev): Use 3+ symbols long names so that trigram index is used.
+TEST(MergeDexIndex, Lookup) {
+  DexIndex I, J;
+  I.build(generateSymbols({"ns::A", "ns::B"}));
+  J.build(generateSymbols({"ns::B", "ns::C"}));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::A")),
+              UnorderedElementsAre("ns::A"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::B")),
+              UnorderedElementsAre("ns::B"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::C")),
+              UnorderedElementsAre("ns::C"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::B")}),
+      UnorderedElementsAre("ns::A", "ns::B"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::C")}),
+      UnorderedElementsAre("ns::A", "ns::C"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::D")),
+              UnorderedElementsAre());
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), {}), UnorderedElementsAre());
+}
+
+// FIXME(kbobyrev): Add more tests on DexIndex? Right now, it's mostly a wrapper
+// around MemIndex, adopting tests from IndexTests.cpp sounds reasonable.
+// However, most of these tests use short names (<3 symbols) for symbol lookups,
+// which currently are dispatched to MemIndex and hence just duplicating these
+// tests while substituting MemIndex with DexIndex is not a viable solution.
+TEST(DexIndex, FuzzyFind) {
+  DexIndex Index;
+  Index.build(generateSymbols(
+      {"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", "other::ABC"}));
+  FuzzyFindRequest Req;
+  Req.Query = "ABC";
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC"));
+  Req.Scopes = {"ns::", "ns::nested::"};
+  EXPECT_THAT(match(Index, Req),
+              UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
+}
+
+TEST(DexIndexTest, FuzzyMatch) {
+  DexIndex I;
+  I.build(
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  FuzzyFindRequest Req;
+  Req.Query = "lol";
+  Req.MaxCandidateCount = 2;
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+} // namespace
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/unittests/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/unittests/clangd/CMakeLists.txt
+++ clang-tools-extra/unittests/clangd/CMakeLists.txt
@@ -23,6 +23,7 @@
   FuzzyMatchTests.cpp
   GlobalCompilationDatabaseTests.cpp
   HeadersTests.cpp
+  IndexHelpers.cpp
   IndexTests.cpp
   QualityTests.cpp
   SourceCodeTests.cpp
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -22,9 +22,9 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 
+#include "../Index.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <string>
 #include <vector>
 
@@ -81,6 +81,8 @@
   }
 };
 
+std::vector<Token> generateSearchTokens(const Symbol &Sym);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/dex/Token.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/Token.cpp
@@ -0,0 +1,25 @@
+//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+#include "Trigram.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+std::vector<Token> generateSearchTokens(const Symbol &Sym) {
+  std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
+  Result.push_back(Token(Token::Kind::Scope, Sym.Scope));
+  return Result;
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -0,0 +1,53 @@
+//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+
+#include "../Index.h"
+#include "../MemIndex.h"
+#include "Iterator.h"
+#include "Token.h"
+#include "Trigram.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// In-memory Dex index implementation.
+class DexIndex : public SymbolIndex {
+public:
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+  // FIXME(kbobyrev): This is the same as in MemIndex. Should I abstract this
+  // out?
+  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
+
+  bool
+  fuzzyFind(const FuzzyFindRequest &Req,
+            llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+  // FIXME(kbobyrev): Symbol lookup is also exactly the same as in MemIndex.
+  virtual void
+  lookup(const LookupRequest &Req,
+         llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+private:
+  std::shared_ptr<std::vector<const Symbol *>> Symbols;
+  llvm::DenseMap<SymbolID, const Symbol *> Index;
+  mutable std::mutex Mutex;
+  llvm::DenseMap<Token, PostingList> InvertedIndex;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -0,0 +1,140 @@
+//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DexIndex.h"
+#include "../../FuzzyMatch.h"
+#include <algorithm>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+  llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
+  for (const Symbol *Sym : *Syms)
+    TempIndex[Sym->ID] = Sym;
+
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    Index = std::move(TempIndex);
+    Symbols = std::move(Syms);
+    std::sort(begin(*Symbols), end(*Symbols),
+              [](const Symbol *LHS, const Symbol *RHS) {
+                return quality(*LHS) > quality(*RHS);
+              });
+    InvertedIndex.clear();
+    for (DocID SymbolRank = 0; SymbolRank < Symbols->size(); ++SymbolRank) {
+      const auto Sym = (*Symbols)[SymbolRank];
+      for (const auto &Token : generateSearchTokens(*Sym)) {
+        if (!InvertedIndex.count(Token))
+          InvertedIndex[Token] = {SymbolRank};
+        else
+          InvertedIndex[Token].push_back(SymbolRank);
+      }
+    }
+  }
+}
+
+bool DexIndex::fuzzyFind(
+    const FuzzyFindRequest &Req,
+    llvm::function_ref<void(const Symbol &)> Callback) const {
+  assert(!StringRef(Req.Query).contains("::") &&
+         "There must be no :: in query.");
+  // FIXME(kbobyrev): Currently, only "long" (3+ symbols) requests are
+  // processed. After tokens are generated for the short queries and index
+  // symbols, this shouldn't be the case.
+  assert(Req.Query.size() >= 3 &&
+         "DexIndex is currently not capable of processing queries of size <3.");
+
+  FuzzyMatcher Filter(Req.Query);
+  bool More;
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+
+    // Generate query trigrams and construct AND iterator over all query
+    // trigrams.
+    const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
+    std::vector<std::unique_ptr<Iterator>> TrigramIterators;
+    for (const auto &Trigram : TrigramTokens) {
+      const auto It = InvertedIndex.find(Trigram);
+      if (It != InvertedIndex.end())
+        TrigramIterators.push_back(create(It->second));
+    }
+
+    std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+    TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
+
+    std::vector<std::unique_ptr<Iterator>> ScopeIterators;
+    for (const auto &Scope : Req.Scopes) {
+      const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
+      if (It != InvertedIndex.end())
+        ScopeIterators.push_back(create(It->second));
+    }
+    // Only add OR iterator for scopes if the request contains scopes, otherwise
+    // the iterator won't have any children and will be invalid.
+    if (ScopeIterators.size())
+      TopLevelChildren.push_back(createOr(move(ScopeIterators)));
+
+    auto QueryIterator = createAnd(move(TopLevelChildren));
+    // FIXME(kbobyrev): Limit the total number of consumed symbols using
+    // Req.MaxCandidateCount. That would require implementing Limit iterator.
+    auto SymbolDocIDs = consume(*QueryIterator);
+
+    More = SymbolDocIDs.size() > Req.MaxCandidateCount;
+
+    // FIXME(kbobyrev): Scoring all matched symbols and then processing
+    // MaxCandidateCount items is rather expensive, this should be replaced by
+    // two-stage filtering as proposed in Dex design document.
+    std::vector<std::pair<float, const Symbol *>> Scores(SymbolDocIDs.size());
+    for (size_t SymbolIdx = 0; SymbolIdx < SymbolDocIDs.size(); ++SymbolIdx) {
+      const auto Sym = (*Symbols)[SymbolDocIDs[SymbolIdx]];
+      const auto Score = Filter.match(Sym->Name);
+      assert(Score.hasValue() && "Matched symbol has all Fuzzy Matching "
+                                 "trigrams, it should match the query");
+      Scores[SymbolIdx] = std::make_pair(-*Score * quality(*Sym), Sym);
+    }
+    // Sort retrieved symbol by Fuzzy Matching score.
+    std::sort(begin(Scores), end(Scores));
+
+    for (size_t CandidateRank = 0;
+         CandidateRank < std::min(Req.MaxCandidateCount, Scores.size());
+         ++CandidateRank)
+      Callback(*Scores[CandidateRank].second);
+  }
+  return More;
+}
+
+void DexIndex::lookup(const LookupRequest &Req,
+                      llvm::function_ref<void(const Symbol &)> Callback) const {
+  for (const auto &ID : Req.IDs) {
+    auto I = Index.find(ID);
+    if (I != Index.end())
+      Callback(*I->second);
+  }
+}
+
+std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
+  struct Snapshot {
+    SymbolSlab Slab;
+    std::vector<const Symbol *> Pointers;
+  };
+  auto Snap = std::make_shared<Snapshot>();
+  Snap->Slab = std::move(Slab);
+  for (auto &Sym : Snap->Slab)
+    Snap->Pointers.push_back(&Sym);
+  auto S = std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap),
+                                                        &Snap->Pointers);
+  auto DexIdx = llvm::make_unique<DexIndex>();
+  DexIdx->build(std::move(S));
+  return std::move(DexIdx);
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/MemIndex.h
===================================================================
--- clang-tools-extra/clangd/index/MemIndex.h
+++ clang-tools-extra/clangd/index/MemIndex.h
@@ -22,7 +22,7 @@
 public:
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+  virtual void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
 
   /// \brief Build index from a symbol slab.
   static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
@@ -38,7 +38,6 @@
 private:
   std::shared_ptr<std::vector<const Symbol *>> Symbols;
   // Index is a set of symbols that are deduplicated by symbol IDs.
-  // FIXME: build smarter index structure.
   llvm::DenseMap<SymbolID, const Symbol *> Index;
   mutable std::mutex Mutex;
 };
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -43,8 +43,10 @@
   index/SymbolCollector.cpp
   index/SymbolYAML.cpp
 
+  index/dex/DexIndex.cpp
   index/dex/Iterator.cpp
   index/dex/Trigram.cpp
+  index/dex/Token.cpp
 
   LINK_LIBS
   clangAST
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to