kbobyrev created this revision. kbobyrev added reviewers: ioeric, ilya-biryukov. kbobyrev added a project: clang-tools-extra. Herald added subscribers: arphaman, mgrang, jkorous, MaskRay, mgorny.
This patch is a proof-of-concept Dex index implementation. It has several flaws, which don't allow replacing static MemIndex yet, such as: - Not being able to handle queries of small size (less than 3 symbols); a way to solve this is generating trigrams of smaller size and having such incomplete trigrams in the index structure. - Speed measurements: while manually editing files in Vim and requesting autocompletion gives an impression that the performance is at least comparable with the current static index, having actual numbers is important because we don't want to hurt the users and roll out slow code. Eric (@ioeric) suggested that we should only replace MemIndex as soon as we have the evidence that this is not a regression in terms of performance. An approach which is likely to be successful here is to wait until we have benchmark library in the LLVM core repository, which is something I have suggested in the LLVM mailing lists, received positive feedback on and started working on. I will add a dependency as soon as the suggested patch is out for a review (currently there's at least one complication which is being addressed by https://github.com/google/benchmark/pull/649). Key performance improvements for iterators are sorting by cost and the limit iterator. - Quality measurements: currently, boosting iterator and two-phase lookup stage are not implemented, without these the quality is likely to be worse than the current implementation can yield. Measuring quality is tricky, but another suggestion in the offline discussion was that the drop-in replacement should only happen after Boosting iterators implementation (and subsequent query enhancement). The proposed changes do not affect Clangd functionality or performance, `DexIndex` is only used in unit tests and not in production code. https://reviews.llvm.org/D50337 Files: clang-tools-extra/clangd/CMakeLists.txt clang-tools-extra/clangd/index/MemIndex.h clang-tools-extra/clangd/index/dex/DexIndex.cpp clang-tools-extra/clangd/index/dex/DexIndex.h clang-tools-extra/clangd/index/dex/Token.cpp clang-tools-extra/clangd/index/dex/Token.h clang-tools-extra/unittests/clangd/CMakeLists.txt clang-tools-extra/unittests/clangd/DexIndexTests.cpp clang-tools-extra/unittests/clangd/IndexHelpers.cpp clang-tools-extra/unittests/clangd/IndexHelpers.h clang-tools-extra/unittests/clangd/IndexTests.cpp
Index: clang-tools-extra/unittests/clangd/IndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/IndexTests.cpp +++ clang-tools-extra/unittests/clangd/IndexTests.cpp @@ -7,33 +7,20 @@ // //===----------------------------------------------------------------------===// +#include "IndexHelpers.h" #include "index/Index.h" #include "index/MemIndex.h" #include "index/Merge.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -using testing::UnorderedElementsAre; using testing::Pointee; +using testing::UnorderedElementsAre; namespace clang { namespace clangd { namespace { -Symbol symbol(llvm::StringRef QName) { - Symbol Sym; - Sym.ID = SymbolID(QName.str()); - size_t Pos = QName.rfind("::"); - if (Pos == llvm::StringRef::npos) { - Sym.Name = QName; - Sym.Scope = ""; - } else { - Sym.Name = QName.substr(Pos + 2); - Sym.Scope = QName.substr(0, Pos + 2); - } - return Sym; -} - MATCHER_P(Named, N, "") { return arg.Name == N; } TEST(SymbolSlab, FindAndIterate) { @@ -52,59 +39,6 @@ EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym)); } -struct SlabAndPointers { - SymbolSlab Slab; - std::vector<const Symbol *> Pointers; -}; - -// Create a slab of symbols with the given qualified names as both IDs and -// names. The life time of the slab is managed by the returned shared pointer. -// If \p WeakSymbols is provided, it will be pointed to the managed object in -// the returned shared pointer. -std::shared_ptr<std::vector<const Symbol *>> -generateSymbols(std::vector<std::string> QualifiedNames, - std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) { - SymbolSlab::Builder Slab; - for (llvm::StringRef QName : QualifiedNames) - Slab.insert(symbol(QName)); - - auto Storage = std::make_shared<SlabAndPointers>(); - Storage->Slab = std::move(Slab).build(); - for (const auto &Sym : Storage->Slab) - Storage->Pointers.push_back(&Sym); - if (WeakSymbols) - *WeakSymbols = Storage; - auto *Pointers = &Storage->Pointers; - return {std::move(Storage), Pointers}; -} - -// Create a slab of symbols with IDs and names [Begin, End], otherwise identical -// to the `generateSymbols` above. -std::shared_ptr<std::vector<const Symbol *>> -generateNumSymbols(int Begin, int End, - std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) { - std::vector<std::string> Names; - for (int i = Begin; i <= End; i++) - Names.push_back(std::to_string(i)); - return generateSymbols(Names, WeakSymbols); -} - -std::string getQualifiedName(const Symbol &Sym) { - return (Sym.Scope + Sym.Name).str(); -} - -std::vector<std::string> match(const SymbolIndex &I, - const FuzzyFindRequest &Req, - bool *Incomplete = nullptr) { - std::vector<std::string> Matches; - bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) { - Matches.push_back(getQualifiedName(Sym)); - }); - if (Incomplete) - *Incomplete = IsIncomplete; - return Matches; -} - TEST(MemIndexTest, MemIndexSymbolsRecycled) { MemIndex I; std::weak_ptr<SlabAndPointers> Symbols; @@ -212,18 +146,6 @@ EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc")); } -// Returns qualified names of symbols with any of IDs in the index. -std::vector<std::string> lookup(const SymbolIndex &I, - llvm::ArrayRef<SymbolID> IDs) { - LookupRequest Req; - Req.IDs.insert(IDs.begin(), IDs.end()); - std::vector<std::string> Results; - I.lookup(Req, [&](const Symbol &Sym) { - Results.push_back(getQualifiedName(Sym)); - }); - return Results; -} - TEST(MemIndexTest, Lookup) { MemIndex I; I.build(generateSymbols({"ns::abc", "ns::xyz"})); @@ -269,7 +191,7 @@ TEST(MergeTest, Merge) { Symbol L, R; L.ID = R.ID = SymbolID("hello"); - L.Name = R.Name = "Foo"; // same in both + L.Name = R.Name = "Foo"; // same in both L.CanonicalDeclaration.FileURI = "file:///left.h"; // differs R.CanonicalDeclaration.FileURI = "file:///right.h"; L.References = 1; Index: clang-tools-extra/unittests/clangd/IndexHelpers.h =================================================================== --- /dev/null +++ clang-tools-extra/unittests/clangd/IndexHelpers.h @@ -0,0 +1,57 @@ +//===-- IndexHelpers.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H +#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H + +#include "index/Index.h" +#include "index/Merge.h" +#include "index/dex/DexIndex.h" +#include "index/dex/Iterator.h" +#include "index/dex/Token.h" +#include "index/dex/Trigram.h" + +namespace clang { +namespace clangd { + +Symbol symbol(llvm::StringRef QName); + +struct SlabAndPointers { + SymbolSlab Slab; + std::vector<const Symbol *> Pointers; +}; + +// Create a slab of symbols with the given qualified names as both IDs and +// names. The life time of the slab is managed by the returned shared pointer. +// If \p WeakSymbols is provided, it will be pointed to the managed object in +// the returned shared pointer. +std::shared_ptr<std::vector<const Symbol *>> +generateSymbols(std::vector<std::string> QualifiedNames, + std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr); + +// Create a slab of symbols with IDs and names [Begin, End], otherwise identical +// to the `generateSymbols` above. +std::shared_ptr<std::vector<const Symbol *>> +generateNumSymbols(int Begin, int End, + std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr); + +std::string getQualifiedName(const Symbol &Sym); + +std::vector<std::string> match(const SymbolIndex &I, + const FuzzyFindRequest &Req, + bool *Incomplete = nullptr); + +// Returns qualified names of symbols with any of IDs in the index. +std::vector<std::string> lookup(const SymbolIndex &I, + llvm::ArrayRef<SymbolID> IDs); + +} // namespace clangd +} // namespace clang + +#endif Index: clang-tools-extra/unittests/clangd/IndexHelpers.cpp =================================================================== --- /dev/null +++ clang-tools-extra/unittests/clangd/IndexHelpers.cpp @@ -0,0 +1,89 @@ +//===-- IndexHelpers.cpp ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "IndexHelpers.h" + +namespace clang { +namespace clangd { + +Symbol symbol(llvm::StringRef QName) { + Symbol Sym; + Sym.ID = SymbolID(QName.str()); + size_t Pos = QName.rfind("::"); + if (Pos == llvm::StringRef::npos) { + Sym.Name = QName; + Sym.Scope = ""; + } else { + Sym.Name = QName.substr(Pos + 2); + Sym.Scope = QName.substr(0, Pos + 2); + } + return Sym; +} + +// Create a slab of symbols with the given qualified names as both IDs and +// names. The life time of the slab is managed by the returned shared pointer. +// If \p WeakSymbols is provided, it will be pointed to the managed object in +// the returned shared pointer. +std::shared_ptr<std::vector<const Symbol *>> +generateSymbols(std::vector<std::string> QualifiedNames, + std::weak_ptr<SlabAndPointers> *WeakSymbols) { + SymbolSlab::Builder Slab; + for (llvm::StringRef QName : QualifiedNames) + Slab.insert(symbol(QName)); + + auto Storage = std::make_shared<SlabAndPointers>(); + Storage->Slab = std::move(Slab).build(); + for (const auto &Sym : Storage->Slab) + Storage->Pointers.push_back(&Sym); + if (WeakSymbols) + *WeakSymbols = Storage; + auto *Pointers = &Storage->Pointers; + return {std::move(Storage), Pointers}; +} + +// Create a slab of symbols with IDs and names [Begin, End], otherwise identical +// to the `generateSymbols` above. +std::shared_ptr<std::vector<const Symbol *>> +generateNumSymbols(int Begin, int End, + std::weak_ptr<SlabAndPointers> *WeakSymbols) { + std::vector<std::string> Names; + for (int i = Begin; i <= End; i++) + Names.push_back(std::to_string(i)); + return generateSymbols(Names, WeakSymbols); +} + +std::string getQualifiedName(const Symbol &Sym) { + return (Sym.Scope + Sym.Name).str(); +} + +std::vector<std::string> match(const SymbolIndex &I, + const FuzzyFindRequest &Req, bool *Incomplete) { + std::vector<std::string> Matches; + bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) { + Matches.push_back(clang::clangd::getQualifiedName(Sym)); + }); + if (Incomplete) + *Incomplete = IsIncomplete; + return Matches; +} + +// Returns qualified names of symbols with any of IDs in the index. +std::vector<std::string> lookup(const SymbolIndex &I, + llvm::ArrayRef<SymbolID> IDs) { + LookupRequest Req; + Req.IDs.insert(IDs.begin(), IDs.end()); + std::vector<std::string> Results; + I.lookup(Req, [&](const Symbol &Sym) { + Results.push_back(getQualifiedName(Sym)); + }); + return Results; +} + +} // namespace clangd +} // namespace clang Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -7,6 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "IndexHelpers.h" +#include "index/Index.h" +#include "index/Merge.h" +#include "index/dex/DexIndex.h" #include "index/dex/Iterator.h" #include "index/dex/Token.h" #include "index/dex/Trigram.h" @@ -17,11 +21,13 @@ #include <string> #include <vector> +using ::testing::ElementsAre; +using ::testing::UnorderedElementsAre; + namespace clang { namespace clangd { namespace dex { - -using ::testing::ElementsAre; +namespace { TEST(DexIndexIterators, DocumentIterator) { const PostingList L = {4, 7, 8, 20, 42, 100}; @@ -312,6 +318,69 @@ "hij", "ijk", "jkl", "klm"})); } +TEST(DexIndex, Lookup) { + DexIndex I; + I.build(generateSymbols({"ns::abc", "ns::xyz"})); + EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); + EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), + UnorderedElementsAre("ns::abc", "ns::xyz")); + EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}), + UnorderedElementsAre("ns::xyz")); + EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre()); +} + +// FIXME(kbobyrev): Use 3+ symbols long names so that trigram index is used. +TEST(MergeDexIndex, Lookup) { + DexIndex I, J; + I.build(generateSymbols({"ns::A", "ns::B"})); + J.build(generateSymbols({"ns::B", "ns::C"})); + EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::A")), + UnorderedElementsAre("ns::A")); + EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::B")), + UnorderedElementsAre("ns::B")); + EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::C")), + UnorderedElementsAre("ns::C")); + EXPECT_THAT( + lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::B")}), + UnorderedElementsAre("ns::A", "ns::B")); + EXPECT_THAT( + lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::C")}), + UnorderedElementsAre("ns::A", "ns::C")); + EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::D")), + UnorderedElementsAre()); + EXPECT_THAT(lookup(*mergeIndex(&I, &J), {}), UnorderedElementsAre()); +} + +// FIXME(kbobyrev): Add more tests on DexIndex? Right now, it's mostly a wrapper +// around MemIndex, adopting tests from IndexTests.cpp sounds reasonable. +// However, most of these tests use short names (<3 symbols) for symbol lookups, +// which currently are dispatched to MemIndex and hence just duplicating these +// tests while substituting MemIndex with DexIndex is not a viable solution. +TEST(DexIndex, FuzzyFind) { + DexIndex Index; + Index.build(generateSymbols( + {"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", "other::ABC"})); + FuzzyFindRequest Req; + Req.Query = "ABC"; + Req.Scopes = {"ns::"}; + EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC")); + Req.Scopes = {"ns::", "ns::nested::"}; + EXPECT_THAT(match(Index, Req), + UnorderedElementsAre("ns::ABC", "ns::nested::ABC")); +} + +TEST(DexIndexTest, FuzzyMatch) { + DexIndex I; + I.build( + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + FuzzyFindRequest Req; + Req.Query = "lol"; + Req.MaxCandidateCount = 2; + EXPECT_THAT(match(I, Req), + UnorderedElementsAre("LaughingOutLoud", "LittleOldLady")); +} + +} // namespace } // namespace dex } // namespace clangd } // namespace clang Index: clang-tools-extra/unittests/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/unittests/clangd/CMakeLists.txt +++ clang-tools-extra/unittests/clangd/CMakeLists.txt @@ -23,6 +23,7 @@ FuzzyMatchTests.cpp GlobalCompilationDatabaseTests.cpp HeadersTests.cpp + IndexHelpers.cpp IndexTests.cpp QualityTests.cpp SourceCodeTests.cpp Index: clang-tools-extra/clangd/index/dex/Token.h =================================================================== --- clang-tools-extra/clangd/index/dex/Token.h +++ clang-tools-extra/clangd/index/dex/Token.h @@ -22,9 +22,9 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H +#include "../Index.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/raw_ostream.h" - #include <string> #include <vector> @@ -81,6 +81,8 @@ } }; +std::vector<Token> generateSearchTokens(const Symbol &Sym); + } // namespace dex } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/index/dex/Token.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Token.cpp @@ -0,0 +1,25 @@ +//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +#include "Trigram.h" + +namespace clang { +namespace clangd { +namespace dex { + +std::vector<Token> generateSearchTokens(const Symbol &Sym) { + std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name); + Result.push_back(Token(Token::Kind::Scope, Sym.Scope)); + return Result; +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/index/dex/DexIndex.h =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/DexIndex.h @@ -0,0 +1,53 @@ +//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H + +#include "../Index.h" +#include "../MemIndex.h" +#include "Iterator.h" +#include "Token.h" +#include "Trigram.h" +#include <mutex> + +namespace clang { +namespace clangd { +namespace dex { + +/// In-memory Dex index implementation. +class DexIndex : public SymbolIndex { +public: + void build(std::shared_ptr<std::vector<const Symbol *>> Symbols); + + // FIXME(kbobyrev): This is the same as in MemIndex. Should I abstract this + // out? + static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab); + + bool + fuzzyFind(const FuzzyFindRequest &Req, + llvm::function_ref<void(const Symbol &)> Callback) const override; + + // FIXME(kbobyrev): Symbol lookup is also exactly the same as in MemIndex. + virtual void + lookup(const LookupRequest &Req, + llvm::function_ref<void(const Symbol &)> Callback) const override; + +private: + std::shared_ptr<std::vector<const Symbol *>> Symbols; + llvm::DenseMap<SymbolID, const Symbol *> Index; + mutable std::mutex Mutex; + llvm::DenseMap<Token, PostingList> InvertedIndex; +}; + +} // namespace dex +} // namespace clangd +} // namespace clang + +#endif Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/DexIndex.cpp @@ -0,0 +1,133 @@ +//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DexIndex.h" +#include "../../FuzzyMatch.h" +#include <algorithm> + +namespace clang { +namespace clangd { +namespace dex { + +void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) { + llvm::DenseMap<SymbolID, const Symbol *> TempIndex; + for (const Symbol *Sym : *Syms) + TempIndex[Sym->ID] = Sym; + + { + std::lock_guard<std::mutex> Lock(Mutex); + Index = std::move(TempIndex); + Symbols = std::move(Syms); + std::sort(begin(*Symbols), end(*Symbols), + [](const Symbol *LHS, const Symbol *RHS) { + return quality(*LHS) > quality(*RHS); + }); + InvertedIndex.clear(); + for (DocID SymbolRank = 0; SymbolRank < Symbols->size(); ++SymbolRank) { + const auto Sym = (*Symbols)[SymbolRank]; + for (const auto &Token : generateSearchTokens(*Sym)) { + if (!InvertedIndex.count(Token)) + InvertedIndex[Token] = {SymbolRank}; + else + InvertedIndex[Token].push_back(SymbolRank); + } + } + } +} + +bool DexIndex::fuzzyFind( + const FuzzyFindRequest &Req, + llvm::function_ref<void(const Symbol &)> Callback) const { + assert(!StringRef(Req.Query).contains("::") && + "There must be no :: in query."); + // FIXME(kbobyrev): Currently, only "long" (3+ symbols) requests are + // processed. After tokens are generated for the short queries and index + // symbols, this shouldn't be the case. + assert(Req.Query.size() >= 3 && + "DexIndex is currently not capable of processing queries of size <3."); + + FuzzyMatcher Filter(Req.Query); + bool More; + { + std::lock_guard<std::mutex> Lock(Mutex); + const auto TrigramTokens = generateIdentifierTrigrams(Req.Query); + std::vector<std::unique_ptr<Iterator>> TrigramIterators; + for (const auto &Trigram : TrigramTokens) { + const auto It = InvertedIndex.find(Trigram); + if (It != InvertedIndex.end()) + TrigramIterators.push_back(create(It->second)); + } + + std::vector<std::unique_ptr<Iterator>> TopLevelChildren; + TopLevelChildren.push_back(createAnd(move(TrigramIterators))); + + std::vector<std::unique_ptr<Iterator>> ScopeIterators; + for (const auto &Scope : Req.Scopes) { + const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope)); + if (It != InvertedIndex.end()) + ScopeIterators.push_back(create(It->second)); + } + if (ScopeIterators.size()) + TopLevelChildren.push_back(createOr(move(ScopeIterators))); + + auto QueryIterator = createAnd(move(TopLevelChildren)); + // FIXME(kbobyrev): Limit the total number of consumed symbols using + // Req.MaxCandidateCount. That would require implementing Limit iterator. + auto SymbolDocIDs = consume(*QueryIterator); + + More = SymbolDocIDs.size() > Req.MaxCandidateCount; + std::vector<std::pair<float, const Symbol *>> Scores(SymbolDocIDs.size()); + for (size_t SymbolIdx = 0; SymbolIdx < SymbolDocIDs.size(); ++SymbolIdx) { + const auto Sym = (*Symbols)[SymbolDocIDs[SymbolIdx]]; + const auto Score = Filter.match(Sym->Name); + assert(Score.hasValue() && "Matched symbol has all Fuzzy Matching " + "trigrams, it should match the query"); + Scores[SymbolIdx] = std::make_pair(-*Score * quality(*Sym), Sym); + } + std::sort(begin(Scores), end(Scores)); + // FIXME(kbobyrev): Scoring all matched symbols and then shrinking it is + // rather expensive, this should be replaced by two-stage filtering as + // proposed in Dex design document. + if (More) + Scores.resize(Req.MaxCandidateCount); + + for (const auto &ScoreSymbol : Scores) + Callback(*ScoreSymbol.second); + } + return More; +} + +void DexIndex::lookup(const LookupRequest &Req, + llvm::function_ref<void(const Symbol &)> Callback) const { + for (const auto &ID : Req.IDs) { + auto I = Index.find(ID); + if (I != Index.end()) + Callback(*I->second); + } +} + +std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) { + struct Snapshot { + SymbolSlab Slab; + std::vector<const Symbol *> Pointers; + }; + auto Snap = std::make_shared<Snapshot>(); + Snap->Slab = std::move(Slab); + for (auto &Sym : Snap->Slab) + Snap->Pointers.push_back(&Sym); + auto S = std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap), + &Snap->Pointers); + auto DexIdx = llvm::make_unique<DexIndex>(); + DexIdx->build(std::move(S)); + return std::move(DexIdx); +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/index/MemIndex.h =================================================================== --- clang-tools-extra/clangd/index/MemIndex.h +++ clang-tools-extra/clangd/index/MemIndex.h @@ -22,7 +22,7 @@ public: /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain /// accessible as long as `Symbols` is kept alive. - void build(std::shared_ptr<std::vector<const Symbol *>> Symbols); + virtual void build(std::shared_ptr<std::vector<const Symbol *>> Symbols); /// \brief Build index from a symbol slab. static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab); @@ -38,7 +38,6 @@ private: std::shared_ptr<std::vector<const Symbol *>> Symbols; // Index is a set of symbols that are deduplicated by symbol IDs. - // FIXME: build smarter index structure. llvm::DenseMap<SymbolID, const Symbol *> Index; mutable std::mutex Mutex; }; Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -43,8 +43,10 @@ index/SymbolCollector.cpp index/SymbolYAML.cpp + index/dex/DexIndex.cpp index/dex/Iterator.cpp index/dex/Trigram.cpp + index/dex/Token.cpp LINK_LIBS clangAST
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits