This revision was automatically updated to reflect the committed changes.
Closed by commit rL340175: [clangd] DexIndex implementation prototype (authored 
by omtcyfz, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D50337?vs=161480&id=161481#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D50337

Files:
  clang-tools-extra/trunk/clangd/CMakeLists.txt
  clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
  clang-tools-extra/trunk/clangd/index/dex/Token.h
  clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
  clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
  clang-tools-extra/trunk/unittests/clangd/TestIndex.cpp
  clang-tools-extra/trunk/unittests/clangd/TestIndex.h

Index: clang-tools-extra/trunk/unittests/clangd/TestIndex.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/TestIndex.cpp
+++ clang-tools-extra/trunk/unittests/clangd/TestIndex.cpp
@@ -0,0 +1,83 @@
+//===-- IndexHelpers.cpp ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestIndex.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName) {
+  Symbol Sym;
+  Sym.ID = SymbolID(QName.str());
+  size_t Pos = QName.rfind("::");
+  if (Pos == llvm::StringRef::npos) {
+    Sym.Name = QName;
+    Sym.Scope = "";
+  } else {
+    Sym.Name = QName.substr(Pos + 2);
+    Sym.Scope = QName.substr(0, Pos + 2);
+  }
+  return Sym;
+}
+
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  SymbolSlab::Builder Slab;
+  for (llvm::StringRef QName : QualifiedNames)
+    Slab.insert(symbol(QName));
+
+  auto Storage = std::make_shared<SlabAndPointers>();
+  Storage->Slab = std::move(Slab).build();
+  for (const auto &Sym : Storage->Slab)
+    Storage->Pointers.push_back(&Sym);
+  if (WeakSymbols)
+    *WeakSymbols = Storage;
+  auto *Pointers = &Storage->Pointers;
+  return {std::move(Storage), Pointers};
+}
+
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  std::vector<std::string> Names;
+  for (int i = Begin; i <= End; i++)
+    Names.push_back(std::to_string(i));
+  return generateSymbols(Names, WeakSymbols);
+}
+
+std::string getQualifiedName(const Symbol &Sym) {
+  return (Sym.Scope + Sym.Name).str();
+}
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req, bool *Incomplete) {
+  std::vector<std::string> Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+    Matches.push_back(clang::clangd::getQualifiedName(Sym));
+  });
+  if (Incomplete)
+    *Incomplete = IsIncomplete;
+  return Matches;
+}
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs) {
+  LookupRequest Req;
+  Req.IDs.insert(IDs.begin(), IDs.end());
+  std::vector<std::string> Results;
+  I.lookup(Req, [&](const Symbol &Sym) {
+    Results.push_back(getQualifiedName(Sym));
+  });
+  return Results;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "TestIndex.h"
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
 #include "index/dex/Iterator.h"
 #include "index/dex/Token.h"
 #include "index/dex/Trigram.h"
@@ -17,11 +21,13 @@
 #include <string>
 #include <vector>
 
+using ::testing::ElementsAre;
+using ::testing::UnorderedElementsAre;
+
 namespace clang {
 namespace clangd {
 namespace dex {
-
-using ::testing::ElementsAre;
+namespace {
 
 TEST(DexIndexIterators, DocumentIterator) {
   const PostingList L = {4, 7, 8, 20, 42, 100};
@@ -359,6 +365,175 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexIndex, Lookup) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::abc", "ns::xyz"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::xyz"));
+  EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+TEST(DexIndex, FuzzyFind) {
+  DexIndex Index;
+  Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
+                               "other::ABC", "other::A"}));
+  FuzzyFindRequest Req;
+  Req.Query = "ABC";
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC"));
+  Req.Scopes = {"ns::", "ns::nested::"};
+  EXPECT_THAT(match(Index, Req),
+              UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
+  Req.Query = "A";
+  Req.Scopes = {"other::"};
+  EXPECT_THAT(match(Index, Req),
+              UnorderedElementsAre("other::A", "other::ABC"));
+  Req.Query = "";
+  Req.Scopes = {};
+  EXPECT_THAT(match(Index, Req),
+              UnorderedElementsAre("ns::ABC", "ns::BCD", "::ABC",
+                                   "ns::nested::ABC", "other::ABC",
+                                   "other::A"));
+}
+
+TEST(DexIndexTest, FuzzyMatchQ) {
+  DexIndex I;
+  I.build(
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  FuzzyFindRequest Req;
+  Req.Query = "lol";
+  Req.MaxCandidateCount = 2;
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexIndexTest, DexIndexSymbolsRecycled) {
+  DexIndex I;
+  std::weak_ptr<SlabAndPointers> Symbols;
+  I.build(generateNumSymbols(0, 10, &Symbols));
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  EXPECT_FALSE(Symbols.expired());
+  // Release old symbols.
+  I.build(generateNumSymbols(0, 0));
+  EXPECT_TRUE(Symbols.expired());
+}
+
+// FIXME(kbobyrev): This test is different for DexIndex and MemIndex: while
+// MemIndex manages response deduplication, DexIndex simply returns all matched
+// symbols which means there might be equivalent symbols in the response.
+// Before drop-in replacement of MemIndex with DexIndex happens, FileIndex
+// should handle deduplication instead.
+TEST(DexIndexTest, DexIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject duplicates.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  DexIndex I;
+  I.build(std::move(Symbols));
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), 4u);
+}
+
+TEST(DexIndexTest, DexIndexLimitedNumMatches) {
+  DexIndex I;
+  I.build(generateNumSymbols(0, 100));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  bool Incomplete;
+  auto Matches = match(I, Req, &Incomplete);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+  EXPECT_TRUE(Incomplete);
+}
+
+TEST(DexIndexTest, FuzzyMatch) {
+  DexIndex I;
+  I.build(
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  FuzzyFindRequest Req;
+  Req.Query = "lol";
+  Req.MaxCandidateCount = 2;
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {""};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {"a::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {"a::", "b::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
+}
+
+TEST(DexIndexTest, NoMatchNestedScopes) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "a::b::y2"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {"a::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
+}
+
+TEST(DexIndexTest, IgnoreCases) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::ABC", "ns::abc"}));
+  FuzzyFindRequest Req;
+  Req.Query = "AB";
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
+}
+
+TEST(DexIndexTest, Lookup) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::abc", "ns::xyz"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::xyz"));
+  EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+} // namespace
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
@@ -7,33 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "TestIndex.h"
 #include "index/Index.h"
 #include "index/MemIndex.h"
 #include "index/Merge.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-using testing::UnorderedElementsAre;
 using testing::Pointee;
+using testing::UnorderedElementsAre;
 
 namespace clang {
 namespace clangd {
 namespace {
 
-Symbol symbol(llvm::StringRef QName) {
-  Symbol Sym;
-  Sym.ID = SymbolID(QName.str());
-  size_t Pos = QName.rfind("::");
-  if (Pos == llvm::StringRef::npos) {
-    Sym.Name = QName;
-    Sym.Scope = "";
-  } else {
-    Sym.Name = QName.substr(Pos + 2);
-    Sym.Scope = QName.substr(0, Pos + 2);
-  }
-  return Sym;
-}
-
 MATCHER_P(Named, N, "") { return arg.Name == N; }
 
 TEST(SymbolSlab, FindAndIterate) {
@@ -52,59 +39,6 @@
     EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
 }
 
-struct SlabAndPointers {
-  SymbolSlab Slab;
-  std::vector<const Symbol *> Pointers;
-};
-
-// Create a slab of symbols with the given qualified names as both IDs and
-// names. The life time of the slab is managed by the returned shared pointer.
-// If \p WeakSymbols is provided, it will be pointed to the managed object in
-// the returned shared pointer.
-std::shared_ptr<std::vector<const Symbol *>>
-generateSymbols(std::vector<std::string> QualifiedNames,
-                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  SymbolSlab::Builder Slab;
-  for (llvm::StringRef QName : QualifiedNames)
-    Slab.insert(symbol(QName));
-
-  auto Storage = std::make_shared<SlabAndPointers>();
-  Storage->Slab = std::move(Slab).build();
-  for (const auto &Sym : Storage->Slab)
-    Storage->Pointers.push_back(&Sym);
-  if (WeakSymbols)
-    *WeakSymbols = Storage;
-  auto *Pointers = &Storage->Pointers;
-  return {std::move(Storage), Pointers};
-}
-
-// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
-// to the `generateSymbols` above.
-std::shared_ptr<std::vector<const Symbol *>>
-generateNumSymbols(int Begin, int End,
-                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  std::vector<std::string> Names;
-  for (int i = Begin; i <= End; i++)
-    Names.push_back(std::to_string(i));
-  return generateSymbols(Names, WeakSymbols);
-}
-
-std::string getQualifiedName(const Symbol &Sym) {
-  return (Sym.Scope + Sym.Name).str();
-}
-
-std::vector<std::string> match(const SymbolIndex &I,
-                               const FuzzyFindRequest &Req,
-                               bool *Incomplete = nullptr) {
-  std::vector<std::string> Matches;
-  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
-    Matches.push_back(getQualifiedName(Sym));
-  });
-  if (Incomplete)
-    *Incomplete = IsIncomplete;
-  return Matches;
-}
-
 TEST(MemIndexTest, MemIndexSymbolsRecycled) {
   MemIndex I;
   std::weak_ptr<SlabAndPointers> Symbols;
@@ -212,18 +146,6 @@
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
-// Returns qualified names of symbols with any of IDs in the index.
-std::vector<std::string> lookup(const SymbolIndex &I,
-                                llvm::ArrayRef<SymbolID> IDs) {
-  LookupRequest Req;
-  Req.IDs.insert(IDs.begin(), IDs.end());
-  std::vector<std::string> Results;
-  I.lookup(Req, [&](const Symbol &Sym) {
-    Results.push_back(getQualifiedName(Sym));
-  });
-  return Results;
-}
-
 TEST(MemIndexTest, Lookup) {
   MemIndex I;
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
@@ -269,7 +191,7 @@
 TEST(MergeTest, Merge) {
   Symbol L, R;
   L.ID = R.ID = SymbolID("hello");
-  L.Name = R.Name = "Foo";                    // same in both
+  L.Name = R.Name = "Foo";                           // same in both
   L.CanonicalDeclaration.FileURI = "file:///left.h"; // differs
   R.CanonicalDeclaration.FileURI = "file:///right.h";
   L.References = 1;
Index: clang-tools-extra/trunk/unittests/clangd/TestIndex.h
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/TestIndex.h
+++ clang-tools-extra/trunk/unittests/clangd/TestIndex.h
@@ -0,0 +1,64 @@
+//===-- IndexHelpers.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
+#include "index/dex/Iterator.h"
+#include "index/dex/Token.h"
+#include "index/dex/Trigram.h"
+
+namespace clang {
+namespace clangd {
+
+// Creates Symbol instance and sets SymbolID to given QualifiedName.
+Symbol symbol(llvm::StringRef QName);
+
+// Bundles symbol pointers with the actual symbol slab the pointers refer to in
+// order to ensure that the slab isn't destroyed while it's used by and index.
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector<const Symbol *> Pointers;
+};
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+// Returns fully-qualified name out of given symbol.
+std::string getQualifiedName(const Symbol &Sym);
+
+// Performs fuzzy matching-based symbol lookup given a query and an index.
+// Incomplete is set true if more items than requested can be retrieved, false
+// otherwise.
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req,
+                               bool *Incomplete = nullptr);
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
@@ -30,6 +30,7 @@
   SyncAPI.cpp
   TUSchedulerTests.cpp
   TestFS.cpp
+  TestIndex.cpp
   TestTU.cpp
   ThreadingTests.cpp
   TraceTests.cpp
Index: clang-tools-extra/trunk/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt
@@ -43,6 +43,7 @@
   index/SymbolCollector.cpp
   index/SymbolYAML.cpp
 
+  index/dex/DexIndex.cpp
   index/dex/Iterator.cpp
   index/dex/Trigram.cpp
 
Index: clang-tools-extra/trunk/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Token.h
+++ clang-tools-extra/trunk/clangd/index/dex/Token.h
@@ -22,9 +22,9 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 
+#include "../Index.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <string>
 #include <vector>
 
Index: clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
===================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
@@ -0,0 +1,76 @@
+//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines Dex - a symbol index implementation based on query iterators
+// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
+// While consuming more memory and having longer build stage due to
+// preprocessing, Dex will have substantially lower latency. It will also allow
+// efficient symbol searching which is crucial for operations like code
+// completion, and can be very important for a number of different code
+// transformations which will be eventually supported by Clangd.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+
+#include "../Index.h"
+#include "../MemIndex.h"
+#include "Iterator.h"
+#include "Token.h"
+#include "Trigram.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// In-memory Dex trigram-based index implementation.
+// FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
+// index so that it can be loaded from the disk. Since static index is not
+// changed frequently, it's safe to assume that it has to be built only once
+// (when the clangd process starts). Therefore, it can be easier to store built
+// index on disk and then load it if available.
+class DexIndex : public SymbolIndex {
+public:
+  /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
+  /// accessible as long as `Symbols` is kept alive.
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+  bool
+  fuzzyFind(const FuzzyFindRequest &Req,
+            llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+  void lookup(const LookupRequest &Req,
+              llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+  void findOccurrences(const OccurrencesRequest &Req,
+                       llvm::function_ref<void(const SymbolOccurrence &)>
+                           Callback) const override;
+
+private:
+  mutable std::mutex Mutex;
+
+  std::shared_ptr<std::vector<const Symbol *>> Symbols /*GUARDED_BY(Mutex)*/;
+  llvm::DenseMap<SymbolID, const Symbol *> LookupTable /*GUARDED_BY(Mutex)*/;
+  llvm::DenseMap<const Symbol *, float> SymbolQuality /*GUARDED_BY(Mutex)*/;
+  // Inverted index is a mapping from the search token to the posting list,
+  // which contains all items which can be characterized by such search token.
+  // For example, if the search token is scope "std::", the corresponding
+  // posting list would contain all indices of symbols defined in namespace std.
+  // Inverted index is used to retrieve posting lists which are processed during
+  // the fuzzyFind process.
+  llvm::DenseMap<Token, PostingList> InvertedIndex /*GUARDED_BY(Mutex)*/;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
@@ -0,0 +1,167 @@
+//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DexIndex.h"
+#include "../../FuzzyMatch.h"
+#include "../../Logger.h"
+#include <algorithm>
+#include <queue>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+namespace {
+
+// Returns the tokens which are given symbol's characteristics. Currently, the
+// generated tokens only contain fuzzy matching trigrams and symbol's scope,
+// but in the future this will also return path proximity tokens and other
+// types of tokens such as symbol type (if applicable).
+// Returns the tokens which are given symbols's characteristics. For example,
+// trigrams and scopes.
+// FIXME(kbobyrev): Support more token types:
+// * Path proximity
+// * Types
+std::vector<Token> generateSearchTokens(const Symbol &Sym) {
+  std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
+  Result.push_back(Token(Token::Kind::Scope, Sym.Scope));
+  return Result;
+}
+
+} // namespace
+
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+  llvm::DenseMap<SymbolID, const Symbol *> TempLookupTable;
+  llvm::DenseMap<const Symbol *, float> TempSymbolQuality;
+  for (const Symbol *Sym : *Syms) {
+    TempLookupTable[Sym->ID] = Sym;
+    TempSymbolQuality[Sym] = quality(*Sym);
+  }
+
+  // Symbols are sorted by symbol qualities so that items in the posting lists
+  // are stored in the descending order of symbol quality.
+  std::sort(begin(*Syms), end(*Syms),
+            [&](const Symbol *LHS, const Symbol *RHS) {
+              return TempSymbolQuality[LHS] > TempSymbolQuality[RHS];
+            });
+  llvm::DenseMap<Token, PostingList> TempInvertedIndex;
+  // Populate TempInvertedIndex with posting lists for index symbols.
+  for (DocID SymbolRank = 0; SymbolRank < Syms->size(); ++SymbolRank) {
+    const auto *Sym = (*Syms)[SymbolRank];
+    for (const auto &Token : generateSearchTokens(*Sym))
+      TempInvertedIndex[Token].push_back(SymbolRank);
+  }
+
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+
+    // Replace outdated index with the new one.
+    LookupTable = std::move(TempLookupTable);
+    Symbols = std::move(Syms);
+    InvertedIndex = std::move(TempInvertedIndex);
+    SymbolQuality = std::move(TempSymbolQuality);
+  }
+}
+
+/// Constructs iterators over tokens extracted from the query and exhausts it
+/// while applying Callback to each symbol in the order of decreasing quality
+/// of the matched symbols.
+bool DexIndex::fuzzyFind(
+    const FuzzyFindRequest &Req,
+    llvm::function_ref<void(const Symbol &)> Callback) const {
+  assert(!StringRef(Req.Query).contains("::") &&
+         "There must be no :: in query.");
+  FuzzyMatcher Filter(Req.Query);
+  bool More = false;
+
+  std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+  const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
+
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+
+    // Generate query trigrams and construct AND iterator over all query
+    // trigrams.
+    std::vector<std::unique_ptr<Iterator>> TrigramIterators;
+    for (const auto &Trigram : TrigramTokens) {
+      const auto It = InvertedIndex.find(Trigram);
+      if (It != InvertedIndex.end())
+        TrigramIterators.push_back(create(It->second));
+    }
+    if (!TrigramIterators.empty())
+      TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
+
+    // Generate scope tokens for search query.
+    std::vector<std::unique_ptr<Iterator>> ScopeIterators;
+    for (const auto &Scope : Req.Scopes) {
+      const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
+      if (It != InvertedIndex.end())
+        ScopeIterators.push_back(create(It->second));
+    }
+    // Add OR iterator for scopes if there are any Scope Iterators.
+    if (!ScopeIterators.empty())
+      TopLevelChildren.push_back(createOr(move(ScopeIterators)));
+
+    // Use TRUE iterator if both trigrams and scopes from the query are not
+    // present in the symbol index.
+    auto QueryIterator = TopLevelChildren.empty()
+                             ? createTrue(Symbols->size())
+                             : createAnd(move(TopLevelChildren));
+    // Retrieve more items than it was requested: some of  the items with high
+    // final score might not be retrieved otherwise.
+    // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
+    // using 100x of the requested number might not be good in practice, e.g.
+    // when the requested number of items is small.
+    const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+    std::vector<DocID> SymbolDocIDs = consume(*QueryIterator, ItemsToRetrieve);
+
+    // Retrieve top Req.MaxCandidateCount items.
+    std::priority_queue<std::pair<float, const Symbol *>> Top;
+    for (const auto &SymbolDocID : SymbolDocIDs) {
+      const auto *Sym = (*Symbols)[SymbolDocID];
+      const llvm::Optional<float> Score = Filter.match(Sym->Name);
+      if (!Score)
+        continue;
+      // Multiply score by a negative factor so that Top stores items with the
+      // highest actual score.
+      Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym);
+      if (Top.size() > Req.MaxCandidateCount) {
+        More = true;
+        Top.pop();
+      }
+    }
+
+    // Apply callback to the top Req.MaxCandidateCount items.
+    for (; !Top.empty(); Top.pop())
+      Callback(*Top.top().second);
+  }
+
+  return More;
+}
+
+void DexIndex::lookup(const LookupRequest &Req,
+                      llvm::function_ref<void(const Symbol &)> Callback) const {
+  std::lock_guard<std::mutex> Lock(Mutex);
+  for (const auto &ID : Req.IDs) {
+    auto I = LookupTable.find(ID);
+    if (I != LookupTable.end())
+      Callback(*I->second);
+  }
+}
+
+
+void DexIndex::findOccurrences(
+    const OccurrencesRequest &Req,
+    llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
+  log("findOccurrences is not implemented.");
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to