[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 126919. ioeric marked 3 inline comments as done. ioeric added a comment. - Address more comments. # I am going to land it now. - Merge remote-tracking branch 'origin/master' into index Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/index/Index.h clangd/index/MemIndex.cpp clangd/index/MemIndex.h unittests/clangd/CMakeLists.txt unittests/clangd/IndexTests.cpp Index: unittests/clangd/IndexTests.cpp === --- /dev/null +++ unittests/clangd/IndexTests.cpp @@ -0,0 +1,114 @@ +//===-- IndexTests.cpp ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#include "index/Index.h" +#include "index/MemIndex.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; + +namespace clang { +namespace clangd { + +namespace { + +Symbol symbol(llvm::StringRef ID) { + Symbol Sym; + Sym.ID = SymbolID(ID); + Sym.QualifiedName = ID; + return Sym; +} + +struct SlabAndPointers { + SymbolSlab Slab; + std::vector Pointers; +}; + +// Create a slab of symbols with IDs and names [Begin, End]. The life time of +// the slab is managed by the returned shared pointer. If \p WeakSymbols is +// provided, it will be pointed to the managed object in the returned shared +// pointer. +std::shared_ptr> +generateNumSymbols(int Begin, int End, + std::weak_ptr *WeakSymbols = nullptr) { + auto Slab = std::make_shared(); + if (WeakSymbols) +*WeakSymbols = Slab; + + for (int i = Begin; i <= End; i++) +Slab->Slab.insert(symbol(std::to_string(i))); + + for (const auto &Sym : Slab->Slab) +Slab->Pointers.push_back(&Sym.second); + + return {std::move(Slab), &Slab->Pointers}; +} + +std::vector match(const SymbolIndex &I, + const FuzzyFindRequest &Req) { + std::vector Matches; + auto Ctx = Context::empty(); + I.fuzzyFind(Ctx, Req, + [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); }); + return Matches; +} + +TEST(MemIndexTest, MemIndexSymbolsRecycled) { + MemIndex I; + std::weak_ptr Symbols; + I.build(generateNumSymbols(0, 10, &Symbols)); + FuzzyFindRequest Req; + Req.Query = "7"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("7")); + + EXPECT_FALSE(Symbols.expired()); + // Release old symbols. + I.build(generateNumSymbols(0, 0)); + EXPECT_TRUE(Symbols.expired()); +} + +TEST(MemIndexTest, MemIndexMatchSubstring) { + MemIndex I; + I.build(generateNumSymbols(5, 25)); + FuzzyFindRequest Req; + Req.Query = "5"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25")); +} + +TEST(MemIndexTest, MemIndexDeduplicate) { + auto Symbols = generateNumSymbols(0, 10); + + // Inject some duplicates and make sure we only match the same symbol once. + auto Sym = symbol("7"); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + + FuzzyFindRequest Req; + Req.Query = "7"; + MemIndex I; + I.build(std::move(Symbols)); + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), 1u); +} + +TEST(MemIndexTest, MemIndexLimitedNumMatches) { + MemIndex I; + I.build(generateNumSymbols(0, 100)); + FuzzyFindRequest Req; + Req.Query = "5"; + Req.MaxCandidateCount = 3; + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), Req.MaxCandidateCount); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt === --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -13,6 +13,7 @@ CodeCompleteTests.cpp ContextTests.cpp FuzzyMatchTests.cpp + IndexTests.cpp JSONExprTests.cpp TestFS.cpp TraceTests.cpp Index: clangd/index/MemIndex.h === --- /dev/null +++ clangd/index/MemIndex.h @@ -0,0 +1,41 @@ +//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H + +#include "Index.h" +#include + +namespace clang { +namespace clangd { + +/// \brief This implements an index for a (relatively small) set of symbols that +/// can be easily managed in memory. +class MemIndex : public SymbolIndex { +public: + /// \brief (Re-)Build index for `Symbols`
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
sammccall accepted this revision. sammccall added inline comments. Comment at: unittests/clangd/IndexTests.cpp:29 + +struct SlabAndPointers { + SymbolSlab Slab; this could be local to generateNumSymbols, up to you Comment at: unittests/clangd/IndexTests.cpp:51 + + auto Symbols = std::shared_ptr>(std::move(Slab), + &Slab->Pointers); nit: just return? (you can skip spelling out the shared_ptr type with {}, I think) Comment at: unittests/clangd/IndexTests.cpp:73 + EXPECT_THAT(match(I, Req), UnorderedElementsAre("7")); + + // Release old symbols. assert symbols is not expired here? Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 126917. ioeric marked an inline comment as done. ioeric added a comment. - Fix comment for fuzzyFind Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/index/Index.h clangd/index/MemIndex.cpp clangd/index/MemIndex.h unittests/clangd/CMakeLists.txt unittests/clangd/IndexTests.cpp Index: unittests/clangd/IndexTests.cpp === --- /dev/null +++ unittests/clangd/IndexTests.cpp @@ -0,0 +1,116 @@ +//===-- IndexTests.cpp ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#include "index/Index.h" +#include "index/MemIndex.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; + +namespace clang { +namespace clangd { + +namespace { + +Symbol symbol(llvm::StringRef ID) { + Symbol Sym; + Sym.ID = SymbolID(ID); + Sym.QualifiedName = ID; + return Sym; +} + +struct SlabAndPointers { + SymbolSlab Slab; + std::vector Pointers; +}; + +// Create a slab of symbols with IDs and names [Begin, End]. The life time of +// the slab is managed by the returned shared pointer. If \p WeakSymbols is +// provided, it will be pointed to the managed object in the returned shared +// pointer. +std::shared_ptr> +generateNumSymbols(int Begin, int End, + std::weak_ptr *WeakSymbols = nullptr) { + auto Slab = std::make_shared(); + if (WeakSymbols) +*WeakSymbols = Slab; + + for (int i = Begin; i <= End; i++) +Slab->Slab.insert(symbol(std::to_string(i))); + + for (const auto &Sym : Slab->Slab) +Slab->Pointers.push_back(&Sym.second); + + auto Symbols = std::shared_ptr>(std::move(Slab), + &Slab->Pointers); + + return Symbols; +} + +std::vector match(const SymbolIndex &I, + const FuzzyFindRequest &Req) { + std::vector Matches; + auto Ctx = Context::empty(); + I.fuzzyFind(Ctx, Req, + [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); }); + return Matches; +} + +TEST(MemIndexTest, MemIndexSymbolsRecycled) { + MemIndex I; + std::weak_ptr Symbols; + I.build(generateNumSymbols(0, 10, &Symbols)); + FuzzyFindRequest Req; + Req.Query = "7"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("7")); + + // Release old symbols. + I.build(generateNumSymbols(0, 0)); + EXPECT_TRUE(Symbols.expired()); +} + +TEST(MemIndexTest, MemIndexMatchSubstring) { + MemIndex I; + I.build(generateNumSymbols(5, 25)); + FuzzyFindRequest Req; + Req.Query = "5"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25")); +} + +TEST(MemIndexTest, MemIndexDeduplicate) { + auto Symbols = generateNumSymbols(0, 10); + + // Inject some duplicates and make sure we only match the same symbol once. + auto Sym = symbol("7"); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + + FuzzyFindRequest Req; + Req.Query = "7"; + MemIndex I; + I.build(std::move(Symbols)); + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), 1u); +} + +TEST(MemIndexTest, MemIndexLimitedNumMatches) { + MemIndex I; + I.build(generateNumSymbols(0, 100)); + FuzzyFindRequest Req; + Req.Query = "5"; + Req.MaxCandidateCount = 3; + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), Req.MaxCandidateCount); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt === --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -13,6 +13,7 @@ CodeCompleteTests.cpp ContextTests.cpp FuzzyMatchTests.cpp + IndexTests.cpp JSONExprTests.cpp TestFS.cpp TraceTests.cpp Index: clangd/index/MemIndex.h === --- /dev/null +++ clangd/index/MemIndex.h @@ -0,0 +1,41 @@ +//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H + +#include "Index.h" +#include + +namespace clang { +namespace clangd { + +/// \brief This implements an index for a (relatively small) set of symbols that +/// can be easily managed in memory. +class MemIndex : public SymbolIndex { +public: + /// \brief (Re-)Build index for `Symbols`. All symbol
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 126916. ioeric marked 7 inline comments as done. ioeric added a comment. - Address review comments Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/index/Index.h clangd/index/MemIndex.cpp clangd/index/MemIndex.h unittests/clangd/CMakeLists.txt unittests/clangd/IndexTests.cpp Index: unittests/clangd/IndexTests.cpp === --- /dev/null +++ unittests/clangd/IndexTests.cpp @@ -0,0 +1,116 @@ +//===-- IndexTests.cpp ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#include "index/Index.h" +#include "index/MemIndex.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; + +namespace clang { +namespace clangd { + +namespace { + +Symbol symbol(llvm::StringRef ID) { + Symbol Sym; + Sym.ID = SymbolID(ID); + Sym.QualifiedName = ID; + return Sym; +} + +struct SlabAndPointers { + SymbolSlab Slab; + std::vector Pointers; +}; + +// Create a slab of symbols with IDs and names [Begin, End]. The life time of +// the slab is managed by the returned shared pointer. If \p WeakSymbols is +// provided, it will be pointed to the managed object in the returned shared +// pointer. +std::shared_ptr> +generateNumSymbols(int Begin, int End, + std::weak_ptr *WeakSymbols = nullptr) { + auto Slab = std::make_shared(); + if (WeakSymbols) +*WeakSymbols = Slab; + + for (int i = Begin; i <= End; i++) +Slab->Slab.insert(symbol(std::to_string(i))); + + for (const auto &Sym : Slab->Slab) +Slab->Pointers.push_back(&Sym.second); + + auto Symbols = std::shared_ptr>(std::move(Slab), + &Slab->Pointers); + + return Symbols; +} + +std::vector match(const SymbolIndex &I, + const FuzzyFindRequest &Req) { + std::vector Matches; + auto Ctx = Context::empty(); + I.fuzzyFind(Ctx, Req, + [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); }); + return Matches; +} + +TEST(MemIndexTest, MemIndexSymbolsRecycled) { + MemIndex I; + std::weak_ptr Symbols; + I.build(generateNumSymbols(0, 10, &Symbols)); + FuzzyFindRequest Req; + Req.Query = "7"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("7")); + + // Release old symbols. + I.build(generateNumSymbols(0, 0)); + EXPECT_TRUE(Symbols.expired()); +} + +TEST(MemIndexTest, MemIndexMatchSubstring) { + MemIndex I; + I.build(generateNumSymbols(5, 25)); + FuzzyFindRequest Req; + Req.Query = "5"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25")); +} + +TEST(MemIndexTest, MemIndexDeduplicate) { + auto Symbols = generateNumSymbols(0, 10); + + // Inject some duplicates and make sure we only match the same symbol once. + auto Sym = symbol("7"); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + + FuzzyFindRequest Req; + Req.Query = "7"; + MemIndex I; + I.build(std::move(Symbols)); + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), 1u); +} + +TEST(MemIndexTest, MemIndexLimitedNumMatches) { + MemIndex I; + I.build(generateNumSymbols(0, 100)); + FuzzyFindRequest Req; + Req.Query = "5"; + Req.MaxCandidateCount = 3; + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), Req.MaxCandidateCount); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt === --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -13,6 +13,7 @@ CodeCompleteTests.cpp ContextTests.cpp FuzzyMatchTests.cpp + IndexTests.cpp JSONExprTests.cpp TestFS.cpp TraceTests.cpp Index: clangd/index/MemIndex.h === --- /dev/null +++ clangd/index/MemIndex.h @@ -0,0 +1,41 @@ +//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H + +#include "Index.h" +#include + +namespace clang { +namespace clangd { + +/// \brief This implements an index for a (relatively small) set of symbols that +/// can be easily managed in memory. +class MemIndex : public SymbolIndex { +public: + /// \brief (Re-)Build index for `Symbols`. All symbol p
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
sammccall accepted this revision. sammccall added a comment. This revision is now accepted and ready to land. Thanks for the split & refactoring! This looks nice. Just nits, plus some thoughts on the test. Comment at: clangd/index/Index.cpp:11 #include "Index.h" - +#include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/Support/SHA1.h" changes in this file can now be reverted I think Comment at: clangd/index/Index.h:128 + /// \brief Matches symbols in the index fuzzily and applies \p Callback on + /// each matched symbol. + /// nit: "... before returning". Just to be explicit that this is a synchronous API. Comment at: clangd/index/Index.h:130 + /// + /// Returns true if all candidates are matched. + virtual bool This is a little vague - Returns true if the result list is complete, false if it was truncated due to MaxCandidateCount? Might be clearer to invert it - Returns true if the result list was truncated - up to you. Comment at: clangd/index/Index.h:132 + virtual bool + fuzzyFind(const FuzzyFindRequest &Req, +std::function Callback) const = 0; context patch has landed, so this should now take const Context& as first parameter Comment at: clangd/index/Index.h:136 + // FIXME: add interfaces for more index use cases: + // - Symbol getSymbolInfo(llvm::StringRef USR); + // - getAllOccurrences(llvm::StringRef USR); USRs will rather be SymbolIDs Comment at: clangd/index/MemIndex.cpp:38 + // Find all symbols that contain the query, igoring cases. + // FIXME: use better matching algorithm, e.g. fuzzy matcher. + if (StringRef(StringRef(Sym->QualifiedName).lower()) you can easily do this now - it shouldn't even be more lines of code. Up to you of course. Comment at: unittests/clangd/IndexTests.cpp:29 + +struct CountedSymbolSlab { + CountedSymbolSlab() = delete; nit: given the name, I actually think inheriting from Slab might be clearer. Though I'm not sure we actually need this, see `weak_ptr` part of next comment. Comment at: unittests/clangd/IndexTests.cpp:41 + +class MemIndexTest : public ::testing::Test { +protected: This is a nice test, the one weakness I see is it's hard to read the cases in isolation as there's quite a lot of action-at-a-distance. I think we can eliminate the base class without adding much boilerplate, which would make the tests more self-contained: - `Index` can be owned by the test - `match()` is good, but moving `Index.build()` call into the test, and accepting `Index` as a param means this interaction is more obvious and `match` can be a free function. - `CountedSymbolSlab` can more directly be replaced by a `weak_ptr`, which can tell you whether a `shared_ptr` is alive - `generateNumSymbols` (already) doesn't need to be a member. Having it optionally emit a weak_ptr to its return value would simplify the tests a little. So MemIndexSymbolsRecycled would look something like Index I; std::weak_ptr> Symbols; I.build(generateNumSymbols(0, 10), &Symbols); EXPECT_THAT(match(I, Req), ElementsAre("7")); EXPECT_FALSE(Symbols.expired()); I.build(generateNumSymbols(0,0)); EXPECT_TRUE(Symbols.expired()); WDYT? Comment at: unittests/clangd/IndexTests.cpp:62 + Slab->Slab.insert(symbol(std::to_string(i))); +auto *Symbols = new std::vector(); + This is a leak - the returned shared_ptr doesn't own this vector. (I'm actually fine with this for simplicity if it's documented - but we might be running leak checkers that complain about it?) In principle you need to make_shared a struct containing both the vector and the slab. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 126775. ioeric added a comment. - Remove everything except for SymbolIndex interface and MemIndex - Added unit tests for MemIndex Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/index/Index.cpp clangd/index/Index.h clangd/index/MemIndex.cpp clangd/index/MemIndex.h unittests/clangd/CMakeLists.txt unittests/clangd/IndexTests.cpp Index: unittests/clangd/IndexTests.cpp === --- /dev/null +++ unittests/clangd/IndexTests.cpp @@ -0,0 +1,118 @@ +//===-- IndexTests.cpp ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#include "index/Index.h" +#include "index/MemIndex.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; + +namespace clang { +namespace clangd { + +namespace { + +Symbol symbol(llvm::StringRef ID) { + Symbol Sym; + Sym.ID = SymbolID(ID); + Sym.QualifiedName = ID; + return Sym; +} + +struct CountedSymbolSlab { + CountedSymbolSlab() = delete; + + explicit CountedSymbolSlab(int &Counter) : Counter(Counter) { ++Counter; } + + ~CountedSymbolSlab() { Counter--; } + + SymbolSlab Slab; + // A counter for the number of living slabs. + int &Counter; +}; + +class MemIndexTest : public ::testing::Test { +protected: + std::vector + match(std::shared_ptr> Symbols, +const FuzzyFindRequest &Req) { +Index.build(std::move(Symbols)); +std::vector Matches; +Index.fuzzyFind( +Req, [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); }); +return Matches; + } + + // Build a CountedSymbolSlab containing symbols with IDs and names [Begin, + // End]. The life time of the slab is managed by the returned shared_ptr, + // which points to a vector of pointers pointing to all symbols in the slab. + std::shared_ptr> + generateNumSymbols(int Begin, int End) { +auto Slab = std::make_shared(SlabCounter); + +for (int i = Begin; i <= End; i++) + Slab->Slab.insert(symbol(std::to_string(i))); +auto *Symbols = new std::vector(); + +for (const auto &Sym : Slab->Slab) + Symbols->push_back(&Sym.second); +return std::shared_ptr>(std::move(Slab), +Symbols); + } + + int SlabCounter = 0; + MemIndex Index; +}; + +TEST_F(MemIndexTest, MemIndexSymbolsRecycled) { + FuzzyFindRequest Req; + Req.Query = "7"; + auto Matches = match(generateNumSymbols(0, 10), Req); + EXPECT_THAT(Matches, UnorderedElementsAre("7")); + + EXPECT_EQ(SlabCounter, 1); + // Release old symbols. + Index.build(std::make_shared>()); + EXPECT_EQ(SlabCounter, 0); +} + +TEST_F(MemIndexTest, MemIndexMatchSubstring) { + FuzzyFindRequest Req; + Req.Query = "5"; + auto Matches = match(generateNumSymbols(5, 25), Req); + EXPECT_THAT(Matches, UnorderedElementsAre("5", "15", "25")); +} + +TEST_F(MemIndexTest, MemIndexDeduplicate) { + auto Symbols = generateNumSymbols(0, 10); + + // Inject some duplicates and make sure we only match the same symbol once. + auto Sym = symbol("7"); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + + FuzzyFindRequest Req; + Req.Query = "7"; + auto Matches = match(std::move(Symbols), Req); + EXPECT_EQ(Matches.size(), 1u); +} + +TEST_F(MemIndexTest, MemIndexLimitedNumMatches) { + FuzzyFindRequest Req; + Req.Query = "5"; + Req.MaxCandidateCount = 3; + auto Matches = match(generateNumSymbols(0, 100), Req); + EXPECT_EQ(Matches.size(), Req.MaxCandidateCount); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt === --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -13,6 +13,7 @@ CodeCompleteTests.cpp ContextTests.cpp FuzzyMatchTests.cpp + IndexTests.cpp JSONExprTests.cpp TestFS.cpp TraceTests.cpp Index: clangd/index/MemIndex.h === --- /dev/null +++ clangd/index/MemIndex.h @@ -0,0 +1,41 @@ +//===--- MemIndex.h - Dynamic in-memory symbol index. -- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H + +#include "Index.h" +#include + +namespace clang { +namespace clangd { + +/// \brief This implements an index f
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
sammccall added a comment. I think this patch is too big, and there are several separate pieces here: - the index interface, and the implementation of the dynamic symbol index (these probably belong together). We're missing unit tests for the implementation. - building and updating the dynamic symbol index in clangd - code completion based on a symbol index As a consequence I'm finding it pretty intimidating to review, I've tried to leave the main high-level comments. Comment at: clangd/ClangdIndex.h:24 + +/// \brief This manages symbols from source files that can be updated or +/// removed. Symbols are used by a symbol index which builds indexes on symbols nit: maybe call this out as a container? that tells a lot about what it does/doesn't do. e.g. `\brief A container of Symbols from several source files. It can be updated at source-file granularity, replacing all symbols from one file with a new set.` ... Comment at: clangd/ClangdUnit.cpp:854 NewAST->getDiagnostics().end()); + if (That->FileSyms) { +auto Symbols = Having clangdunit know how the index is maintained feels like a layering violation. Can we instead have clangdunit take a callback (`UniqueFunction` or so) that gets invoked after each successful build? That can be the "hook" where we do indexing. The common higher layer (ClangdServer) then just has to have the glue code tying the ClangdUnit API to the Index functionality. Comment at: clangd/ClangdUnitStore.h:29 public: + explicit CppFileCollection(FileSymbols *FileSyms) : FileSyms(FileSyms) {} + Similarly here, this can take a post-build callback of void(Filename, ParsedAST&) that is translated into per-cppfile callbacks. Then it doesn't need to know about FileSymbols. (Or ParsedAST*, with nullptr meaning this file is gone) Comment at: clangd/CodeComplete.h:50 + /// Add symbols in namespace context (including global namespace) to code + /// completion. I find the config changes pretty confusing. CodeComplete options is for options we want to offer users. This change doesn't seem motivated by a user requirement, but rather by implementation details. If we're going to do that, we should be clear about it, rather than just expose every option of clang::CodeCompletionOptions we might want to set ourselves If we do want to ensure that we can do clangd::CodeCompleteOptions -> clang::CodeCompleteOptions without extra args: CodeCompleteOptions { ... const Index* Index = nullptr; // Populated internally by clangd, do not set } then we can use the presence of index to determine whether to restrict what we ask Sema for. (We could also let users override the index used for code completion this way if we want, but I don't see a use case) Comment at: clangd/index/FileIndex.h:62 +/// whose symbols can be easily managed in memory. +class FileSymbolsMemIndex : public SymbolIndex { +public: I think it'd be possible and nice to decouple `MemIndex` from FileSymbols, such that we can use it for the on-disk index (which is one big slab). At the moment it seems like an unhappy mix - we don't actually get any incrementality, but we're coupled to the incremental storage. I think it's worth addressing now as MemIndex should be named differently and move to another file if this is the case. One way this could work: class MemIndex { shared_ptr> Symbols; // Symbols must remain accessible as long as S is kept alive. build(shared_ptr> S) { // TODO: build smarter index structures later lock_guard Symbols = move(S); // releases old symbols // TODO: move smart index structures } } class FileSymbols { // The shared_ptr keeps the symbols alive shared_ptr> allSymbols() { struct Snapshot { vector Pointers; vector> KeepAlive; }; auto Snap = make_shared(); for (Slab in FileToSymbols) { Snap->KeepAlive.push_back(Slab); for (Sym in *Slab) Snap.Pointers.push_back(&Sym); } return shared_ptr>(move(Snap), &Snap->Pointers); } } // In ClangdServer, when getting new symbols for a file FileSymbols.update(Path, Collector.takeSymbols()); DynamicIndex.build(FileSymbols.allSymbols()); This seems pretty nice, but vector> would work in practice for the interface too, if you don't like aliasing shared_ptr tricks. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 126686. ioeric added a comment. Merged with origin/master and moved index-related files to index/ subdirectory. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/ClangdLSPServer.cpp clangd/ClangdLSPServer.h clangd/ClangdServer.cpp clangd/ClangdServer.h clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/ClangdUnitStore.cpp clangd/ClangdUnitStore.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/index/FileIndex.cpp clangd/index/FileIndex.h clangd/index/Index.cpp clangd/index/Index.h clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/tool/ClangdMain.cpp Index: clangd/tool/ClangdMain.cpp === --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -90,6 +90,15 @@ "Trace internal events and timestamps in chrome://tracing JSON format"), llvm::cl::init(""), llvm::cl::Hidden); +static llvm::cl::opt EnableIndexBasedCompletion( +"enable-index-based-completion", +llvm::cl::desc( +"Enable index-based global code completion (experimental). Clangd will " +"use symbols built from ASTs of opened files and additional indexes " +"(e.g. offline built codebase-wide symbol table) to complete partial " +"symbols."), +llvm::cl::init(false)); + int main(int argc, char *argv[]) { llvm::cl::ParseCommandLineOptions(argc, argv, "clangd"); @@ -170,9 +179,15 @@ clangd::CodeCompleteOptions CCOpts; CCOpts.EnableSnippets = EnableSnippets; CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; + if (EnableIndexBasedCompletion) { +// Disable sema code completion for qualified code completion and use global +// symbol index instead. +CCOpts.IncludeNamespaceLevelDecls = false; + } // Initialize and run ClangdLSPServer. ClangdLSPServer LSPServer(Out, WorkerThreadsCount, StorePreamblesInMemory, -CCOpts, ResourceDirRef, CompileCommandsDirPath); +CCOpts, ResourceDirRef, CompileCommandsDirPath, +EnableIndexBasedCompletion); constexpr int NoShutdownRequestErrorCode = 1; llvm::set_thread_name("clangd.main"); return LSPServer.run(std::cin) ? 0 : NoShutdownRequestErrorCode; Index: clangd/index/SymbolCollector.h === --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -24,6 +24,12 @@ public: SymbolCollector() = default; + void initialize(ASTContext &Ctx) override { ASTCtx = &Ctx; } + + void setPreprocessor(std::shared_ptr PP) override { +this->PP = std::move(PP); + } + bool handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles, ArrayRef Relations, FileID FID, @@ -37,6 +43,9 @@ private: // All Symbols collected from the AST. SymbolSlab Symbols; + + ASTContext *ASTCtx; + std::shared_ptr PP; }; } // namespace clangd Index: clangd/index/SymbolCollector.cpp === --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -8,8 +8,8 @@ //===--===// #include "SymbolCollector.h" - #include "clang/AST/ASTContext.h" +#include "clang/Sema/CodeCompleteConsumer.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/Basic/SourceManager.h" @@ -57,6 +57,107 @@ } return AbsolutePath.str(); } + +std::string getDocumentation(const CodeCompletionString &CCS) { + // Things like __attribute__((nonnull(1,3))) and [[noreturn]]. Present this + // information in the documentation field. + std::string Result; + const unsigned AnnotationCount = CCS.getAnnotationCount(); + if (AnnotationCount > 0) { +Result += "Annotation"; +if (AnnotationCount == 1) { + Result += ": "; +} else /* AnnotationCount > 1 */ { + Result += "s: "; +} +for (unsigned I = 0; I < AnnotationCount; ++I) { + Result += CCS.getAnnotation(I); + Result.push_back(I == AnnotationCount - 1 ? '\n' : ' '); +} + } + // Add brief documentation (if there is any). + if (CCS.getBriefComment() != nullptr) { +if (!Result.empty()) { + // This means we previously added annotations. Add an extra newline + // character to make the annotations stand out. + Result.push_back('\n'); +} +Result += CCS.getBriefComment(); + } + return Result; +} + +void ProcessChunks(const CodeCompletionString &CCS, Symbol *Sym) { + for (const auto &Chunk : CCS) { +// Informative qualifier chunks only clutter completion results, skip +// them. +if (Chunk.Kind == CodeCompletionString::CK_Informative && +StringRef(Chunk.Text).endswith("::")) + continue; + +switch (Chunk.Kind) { +case CodeCompletionString
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric added inline comments. Comment at: clangd/ClangdIndex.h:1 +//===--- ClangdIndex.h - Symbol indexes for clangd.---*- C++-*-===// +// sammccall wrote: > nit: `Clangd` prefix doesn't do much. > I'd suggest `Index.h` for the interface (including Symbol), and `MemIndex.h` > for the in-memory implementations? I'll do the renaming when D40897 is landed since it defines Symbol and would probably create `Index.h` as well. Comment at: clangd/ClangdIndex.h:50 +// relatively small symbol table built offline. +class InMemoryIndexSourcer { +public: sammccall wrote: > As discussed offline - the lifetime of the contained symbols and the > operations on the index derived from it need to be carefully considered. > > Involving inheritance here seems likely to make these tricky interactions > even trickier. > > Would suggest: > - a concrete class (`SymbolCache`? `FileSymbols`?) to hold a collection of > symbols from multiple files, with the ability to iterate over them and > replace all the symbols from one file at once > - a concrete class `MemIndex` that can be constructed from a sequence of > symbols and implements `Index` > > You probably want to make them both immutable with snapshot semantics, or > have a reader-writer lock that spans both. The current revision implements a `FileSymbols` with the snapshot semantics. Not entirely sure if this is what you are looking for. Let me know... Comment at: clangd/ClangdIndex.h:72 + /// collected. + void update(PathRef Path, ASTContext &Context, + std::shared_ptr PP, sammccall wrote: > The AST -> symbol conversion doesn't seem to have much to do with the rest of > the class - we can move this to a free function I think, which would give the > class a narrower responsibility. Moved the conversion code to ClangdUnit.cpp Comment at: clangd/CodeComplete.cpp:378 +// FIXME: output a warning to logger if there are results from sema. +return qualifiedIdCompletionWithIndex(*Index, S, **OptSS, &Items); + } sammccall wrote: > I don't like the idea of doing index lookups from the middle of a sema > callback! > > Can we just record the CCContext in the Collector instead, and do this work > afterwards? Good point! Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 126379. ioeric marked 8 inline comments as done. ioeric added a comment. - Address review comments. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/ClangdIndex.cpp clangd/ClangdIndex.h clangd/ClangdLSPServer.cpp clangd/ClangdLSPServer.h clangd/ClangdServer.cpp clangd/ClangdServer.h clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/ClangdUnitStore.cpp clangd/ClangdUnitStore.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/Symbol.cpp clangd/Symbol.h clangd/SymbolCompletionInfo.cpp clangd/SymbolCompletionInfo.h clangd/SymbolIndex.h clangd/tool/ClangdMain.cpp unittests/clangd/CMakeLists.txt unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- /dev/null +++ unittests/clangd/SymbolCollectorTests.cpp @@ -0,0 +1,110 @@ +//===-- SymbolCollectorTests.cpp ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#include "Symbol.h" +#include "clang/Index/IndexingAction.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +#include +#include + +using testing::ElementsAre; +using testing::Eq; +using testing::Field; + +namespace clang { +namespace clangd { + +namespace { +class SymbolIndexActionFactory : public tooling::FrontendActionFactory { + public: + SymbolIndexActionFactory() = default; + + clang::FrontendAction *create() override { +index::IndexingOptions IndexOpts; +IndexOpts.SystemSymbolFilter = +index::IndexingOptions::SystemSymbolFilterKind::All; +IndexOpts.IndexFunctionLocals = false; +Collector = std::make_shared(); +FrontendAction *Action = +index::createIndexingAction(Collector, IndexOpts, nullptr).release(); +return Action; + } + + std::shared_ptr Collector; +}; + +class SymbolCollectorTest : public ::testing::Test { +public: + bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) { +llvm::IntrusiveRefCntPtr InMemoryFileSystem( +new vfs::InMemoryFileSystem); +llvm::IntrusiveRefCntPtr Files( +new FileManager(FileSystemOptions(), InMemoryFileSystem)); + +const std::string FileName = "symbol.cc"; +const std::string HeaderName = "symbols.h"; +auto Factory = llvm::make_unique(); + +tooling::ToolInvocation Invocation( +{"symbol_collector", "-fsyntax-only", "-std=c++11", FileName}, +Factory->create(), Files.get(), +std::make_shared()); + +InMemoryFileSystem->addFile(HeaderName, 0, +llvm::MemoryBuffer::getMemBuffer(HeaderCode)); + +std::string Content = "#include\"" + std::string(HeaderName) + "\""; +Content += "\n" + MainCode.str(); +InMemoryFileSystem->addFile(FileName, 0, +llvm::MemoryBuffer::getMemBuffer(Content)); +Invocation.run(); +Symbols = Factory->Collector->getSymbols(); +return true; + } + +protected: + std::set Symbols; +}; + +TEST_F(SymbolCollectorTest, CollectSymbol) { + const std::string Header = R"( +class Foo { + void f(); +}; +void f1(); +inline void f2() {} + )"; + const std::string Main = R"( +namespace { +void ff() {} // ignore +} +void f1() {} + )"; + runSymbolCollector(Header, Main); + EXPECT_THAT(Symbols, + UnorderedElementsAre(Field(&Symbol::QualifiedName, Eq("Foo")), + Field(&Symbol::QualifiedName, Eq("Foo::f")), + Field(&Symbol::QualifiedName, Eq("f1")), + Field(&Symbol::QualifiedName, Eq("f2"; +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt === --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -15,6 +15,7 @@ JSONExprTests.cpp TestFS.cpp TraceTests.cpp + SymbolCollectorTests.cpp ) target_link_libraries(ClangdTests Index: clangd/tool/ClangdMain.cpp === --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -90,6 +90,15 @@ "Trace internal events and timestamps in chrome://tracing JSON format"), llvm::cl::init("
Re: [PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
A Hangouts meeting sounds good! Yes, let's arrange via emails. From: Haojian Wu via Phabricator Sent: Friday, December 8, 2017 7:14:42 AM To: ioe...@google.com; Marc-André Laperle; sammcc...@google.com Cc: hok...@google.com; kli...@google.com; mgo...@gentoo.org; ibiryu...@google.com; cfe-commits@lists.llvm.org Subject: [PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion. hokein added a comment. Thanks for the feedback, Marc! Yeah, I think the ClangdIndexDataSymbol and ClangdIndexDataOccurrence are something similar to what https://reviews.llvm.org/D40897 trying to address, maybe put the discussion there? Before that, I think having a sym meeting is a good idea to keep us in the same page. In https://reviews.llvm.org/D40548#949279, @ioeric wrote: > > > > I think some of the ideas here could be useful. This patch focuses mostly on > index interfaces and https://reviews.llvm.org/D40897 emphasizes on the design > of symbol structure. The way symbols are stored and used in this patch is > likely to change depending on how https://reviews.llvm.org/D40897 goes. > > > The "Clangd" prefix adds a bit much of clutter so maybe it should be > > removed. I think the main points are that having generic > > foreachSymbols/foreachOccurrence with callbacks is well suited to implement > > multiple features with minimal copying. > > Although I'm not sure if `foreachSymbols`/... would be feasible for all > indexes yet, we do plan to switch to callback-based index interfaces, which > Sam also proposed in the review comments. > > There have been some offline discussions happening around clangd's indexing, > and sorry that we have not been able to keep you up to date. I think it might > be more efficient if we could meet via VC/Hangouts and sync on our designs. > If you don't mind a meeting, I am happy to arrange it via emails. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
hokein added a comment. Thanks for the feedback, Marc! Yeah, I think the ClangdIndexDataSymbol and ClangdIndexDataOccurrence are something similar to what https://reviews.llvm.org/D40897 trying to address, maybe put the discussion there? Before that, I think having a sym meeting is a good idea to keep us in the same page. In https://reviews.llvm.org/D40548#949279, @ioeric wrote: > > > > I think some of the ideas here could be useful. This patch focuses mostly on > index interfaces and https://reviews.llvm.org/D40897 emphasizes on the design > of symbol structure. The way symbols are stored and used in this patch is > likely to change depending on how https://reviews.llvm.org/D40897 goes. > > > The "Clangd" prefix adds a bit much of clutter so maybe it should be > > removed. I think the main points are that having generic > > foreachSymbols/foreachOccurrence with callbacks is well suited to implement > > multiple features with minimal copying. > > Although I'm not sure if `foreachSymbols`/... would be feasible for all > indexes yet, we do plan to switch to callback-based index interfaces, which > Sam also proposed in the review comments. > > There have been some offline discussions happening around clangd's indexing, > and sorry that we have not been able to keep you up to date. I think it might > be more efficient if we could meet via VC/Hangouts and sync on our designs. > If you don't mind a meeting, I am happy to arrange it via emails. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric added a comment. In https://reviews.llvm.org/D40548#949182, @malaperle wrote: > As a follow-up, here's the interface for querying the index that I am using > right now. It's meant to be able to retrieve from any kind of "backend", i.e. > in-memory, ClangdIndexDataStore, libIndexStore, etc. I was able to implement > "Open Workspace Symbol" (which is close to code completion in concept), Find > References and Find Definitions. > > using USR = llvm::SmallString<256>; > > class ClangdIndexDataOccurrence; > > class ClangdIndexDataSymbol { > public: > virtual index::SymbolKind getKind() = 0; > /// For example, for mynamespace::myclass::mymethod, this will be > /// mymethod. > virtual std::string getName() = 0; > /// For example, for mynamespace::myclass::mymethod, this will be > /// mynamespace::myclass:: > virtual std::string getQualifier() = 0; > virtual std::string getUsr() = 0; > > virtual void foreachOccurrence(index::SymbolRoleSet Roles, > llvm::function_ref Receiver) = 0; > > virtual ~ClangdIndexDataSymbol() = default; > }; > > class ClangdIndexDataOccurrence { > public: > enum class OccurrenceType : uint16_t { >OCCURRENCE, >DEFINITION_OCCURRENCE > }; > > virtual OccurrenceType getKind() const = 0; > virtual std::string getPath() = 0; > /// Get the start offset of the symbol occurrence. The SourceManager can > be > /// used for implementations that need to convert from a line/column > /// representation to an offset. > virtual uint32_t getStartOffset(SourceManager &SM) = 0; > /// Get the end offset of the symbol occurrence. The SourceManager can be > /// used for implementations that need to convert from a line/column > /// representation to an offset. > virtual uint32_t getEndOffset(SourceManager &SM) = 0; > virtual ~ClangdIndexDataOccurrence() = default; > //TODO: Add relations > > static bool classof(const ClangdIndexDataOccurrence *O) { return > O->getKind() == OccurrenceType::OCCURRENCE; } > }; > > /// An occurrence that also has definition with a body that requires > additional > /// locations to keep track of the beginning and end of the body. > class ClangdIndexDataDefinitionOccurrence : public > ClangdIndexDataOccurrence { > public: > virtual uint32_t getDefStartOffset(SourceManager &SM) = 0; > virtual uint32_t getDefEndOffset(SourceManager &SM) = 0; > > static bool classof(const ClangdIndexDataOccurrence *O) { return > O->getKind() == OccurrenceType::DEFINITION_OCCURRENCE; } > }; > > class ClangdIndexDataProvider { > public: > > virtual void foreachSymbols(StringRef Pattern, > llvm::function_ref Receiver) = 0; > virtual void foreachSymbols(const USR &Usr, > llvm::function_ref Receiver) = 0; > > virtual ~ClangdIndexDataProvider() = default; > }; > I think some of the ideas here could be useful. This patch focuses mostly on index interfaces and https://reviews.llvm.org/D40897 emphasizes on the design of symbol structure. The way symbols are stored and used in this patch is likely to change depending on how https://reviews.llvm.org/D40897 goes. > The "Clangd" prefix adds a bit much of clutter so maybe it should be removed. > I think the main points are that having generic > foreachSymbols/foreachOccurrence with callbacks is well suited to implement > multiple features with minimal copying. Although I'm not sure if `foreachSymbols`/... would be feasible for all indexes yet, we do plan to switch to callback-based index interfaces, which Sam also proposed in the review comments. There have been some offline discussions happening around clangd's indexing, and sorry that we have not been able to keep you up to date. I think it might be more efficient if we could meet via VC/Hangouts and sync on our designs. If you don't mind a meeting, I am happy to arrange it via emails. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
malaperle added a comment. As a follow-up, here's the interface for querying the index that I am using right now. It's meant to be able to retrieve from any kind of "backend", i.e. in-memory, ClangdIndexDataStore, libIndexStore, etc. I was able to implement "Open Workspace Symbol" (which is close to code completion in concept), Find References and Find Definitions. using USR = llvm::SmallString<256>; class ClangdIndexDataOccurrence; class ClangdIndexDataSymbol { public: virtual index::SymbolKind getKind() = 0; /// For example, for mynamespace::myclass::mymethod, this will be /// mymethod. virtual std::string getName() = 0; /// For example, for mynamespace::myclass::mymethod, this will be /// mynamespace::myclass:: virtual std::string getQualifier() = 0; virtual std::string getUsr() = 0; virtual void foreachOccurrence(index::SymbolRoleSet Roles, llvm::function_ref Receiver) = 0; virtual ~ClangdIndexDataSymbol() = default; }; class ClangdIndexDataOccurrence { public: enum class OccurrenceType : uint16_t { OCCURRENCE, DEFINITION_OCCURRENCE }; virtual OccurrenceType getKind() const = 0; virtual std::string getPath() = 0; /// Get the start offset of the symbol occurrence. The SourceManager can be /// used for implementations that need to convert from a line/column /// representation to an offset. virtual uint32_t getStartOffset(SourceManager &SM) = 0; /// Get the end offset of the symbol occurrence. The SourceManager can be /// used for implementations that need to convert from a line/column /// representation to an offset. virtual uint32_t getEndOffset(SourceManager &SM) = 0; virtual ~ClangdIndexDataOccurrence() = default; //TODO: Add relations static bool classof(const ClangdIndexDataOccurrence *O) { return O->getKind() == OccurrenceType::OCCURRENCE; } }; /// An occurrence that also has definition with a body that requires additional /// locations to keep track of the beginning and end of the body. class ClangdIndexDataDefinitionOccurrence : public ClangdIndexDataOccurrence { public: virtual uint32_t getDefStartOffset(SourceManager &SM) = 0; virtual uint32_t getDefEndOffset(SourceManager &SM) = 0; static bool classof(const ClangdIndexDataOccurrence *O) { return O->getKind() == OccurrenceType::DEFINITION_OCCURRENCE; } }; class ClangdIndexDataProvider { public: virtual void foreachSymbols(StringRef Pattern, llvm::function_ref Receiver) = 0; virtual void foreachSymbols(const USR &Usr, llvm::function_ref Receiver) = 0; virtual ~ClangdIndexDataProvider() = default; }; The "Clangd" prefix adds a bit much of clutter so maybe it should be removed. I think the main points are that having generic foreachSymbols/foreachOccurrence with callbacks is well suited to implement multiple features with minimal copying. Comment at: clangd/SymbolIndex.h:50 + virtual llvm::Expected + complete(const CompletionRequest &Req) const = 0; + sammccall wrote: > This is finding symbols that fuzzy-match a string, right? > We shouldn't conflate this with code completion - code completion is > context-sensitive, and this query operation will be used for other operations > like navigate-to-symbol. > > Suggest `fuzzyFind` or similar. > (Similarly with CompletionRequest, CompletionSymbol) Similar to completion is "Open Workspace Symbol". So a more generic query could be useful. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
sammccall added inline comments. Comment at: clangd/ClangdIndex.h:24 +/// (re-)scoring symbols from different indexes. +class CombinedSymbolIndex : public SymbolIndex { +public: This seems a little premature to me - it's hard to know if this idea makes sense without actually having multiple implementations to mix together. My gut feeling is that sorting by (index weight, symbol score) puts too much emphasis on the weight. Can we just have a single optional index for now? Comment at: clangd/ClangdIndex.h:36 + + void addSymbolIndex(llvm::StringRef Label, WeightedIndex Index); + If you do keep this class, move the label inside the struct? Avoids a lot of std::pair Comment at: clangd/ClangdIndex.h:50 +// relatively small symbol table built offline. +class InMemoryIndexSourcer { +public: As discussed offline - the lifetime of the contained symbols and the operations on the index derived from it need to be carefully considered. Involving inheritance here seems likely to make these tricky interactions even trickier. Would suggest: - a concrete class (`SymbolCache`? `FileSymbols`?) to hold a collection of symbols from multiple files, with the ability to iterate over them and replace all the symbols from one file at once - a concrete class `MemIndex` that can be constructed from a sequence of symbols and implements `Index` You probably want to make them both immutable with snapshot semantics, or have a reader-writer lock that spans both. Comment at: clangd/ClangdIndex.h:72 + /// collected. + void update(PathRef Path, ASTContext &Context, + std::shared_ptr PP, The AST -> symbol conversion doesn't seem to have much to do with the rest of the class - we can move this to a free function I think, which would give the class a narrower responsibility. Comment at: clangd/ClangdIndex.h:1 +//===--- ClangdIndex.h - Symbol indexes for clangd.---*- C++-*-===// +// nit: `Clangd` prefix doesn't do much. I'd suggest `Index.h` for the interface (including Symbol), and `MemIndex.h` for the in-memory implementations? Comment at: clangd/ClangdLSPServer.h:37 + llvm::Optional CompileCommandsDir, + std::vector< + std::pair> ilya-biryukov wrote: > Maybe pass a parameter of type `SymbolIndex&` instead of a vector, which is > used to create `CombinedSymbolIndex` later? > It seems that `ClangdServer` does not really care about the specific index > implementation that's used (nor should it!) > > We could have helper methods to conveniently create `CombinedSymbolIndex` > from a list of indexes, or even create the default index for clangd. I think the issue is that ClangdServer is going to create and maintain the dynamic index, which should then be merged as a peer with a set of other indexes. Can we just pass in a single SymbolIndex* StaticIdx, and hard-code the assumption about static index + dynamic index being what we're using? That way we can also hard-code the heuristics for merging them together, instead of data-driving everything. Comment at: clangd/ClangdLSPServer.h:38 + llvm::Optional CompileCommandsDir, + bool EnableIndexBasedCodeCompletion, + std::vector< this is kind of two options in one - "should we build the index" and "should we query the index". That seems OK, but I'd consider renaming this to BuildDynamicSymbolIndex or something to make it clear that building is optional, querying it isn't provided as an option Comment at: clangd/CodeComplete.cpp:378 +// FIXME: output a warning to logger if there are results from sema. +return qualifiedIdCompletionWithIndex(*Index, S, **OptSS, &Items); + } I don't like the idea of doing index lookups from the middle of a sema callback! Can we just record the CCContext in the Collector instead, and do this work afterwards? Comment at: clangd/CodeComplete.h:71 /// Get code completions at a specified \p Pos in \p FileName. +/// If `Index` is not nullptr, this will use the index for global code +/// completion; otherwise, use sema code completion by default. I'm not sure you want to spell out this behavior - merging vs replacing is more of an implementation detail Comment at: clangd/SymbolIndex.h:45 + +class SymbolIndex { +public: Interfaces need doc :-) Comment at: clangd/SymbolIndex.h:49 + + virtual llvm::Expected + complete(const CompletionRequest &Req) const = 0; As discussed, we may want a callback-based API that makes it easier for an implementation to manage the lifetime of symbols without copying them. Also is there anything to do with errors other than log and treat as no
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 125962. ioeric added a comment. Diff on https://reviews.llvm.org/D40897 instead origin/master! Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/ClangdIndex.cpp clangd/ClangdIndex.h clangd/ClangdLSPServer.cpp clangd/ClangdLSPServer.h clangd/ClangdServer.cpp clangd/ClangdServer.h clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/ClangdUnitStore.cpp clangd/ClangdUnitStore.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/Symbol.cpp clangd/Symbol.h clangd/SymbolCompletionInfo.cpp clangd/SymbolCompletionInfo.h clangd/SymbolIndex.h clangd/tool/ClangdMain.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -36,15 +36,15 @@ public: SymbolIndexActionFactory() = default; - clang::ASTFrontendAction *create() override { + clang::FrontendAction *create() override { index::IndexingOptions IndexOpts; IndexOpts.SystemSymbolFilter = index::IndexingOptions::SystemSymbolFilterKind::All; IndexOpts.IndexFunctionLocals = false; Collector = std::make_shared(); FrontendAction *Action = index::createIndexingAction(Collector, IndexOpts, nullptr).release(); -return llvm::cast(Action); +return Action; } std::shared_ptr Collector; Index: clangd/tool/ClangdMain.cpp === --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -90,6 +90,15 @@ "Trace internal events and timestamps in chrome://tracing JSON format"), llvm::cl::init(""), llvm::cl::Hidden); +static llvm::cl::opt EnableIndexBasedCompletion( +"enable-index-based-completion", +llvm::cl::desc( +"Enable index-based global code completion (experimental). Clangd will " +"use symbols built from ASTs of opened files and additional indexes " +"(e.g. offline built codebase-wide symbol table) to complete partial " +"symbols."), +llvm::cl::init(false)); + int main(int argc, char *argv[]) { llvm::cl::ParseCommandLineOptions(argc, argv, "clangd"); @@ -170,10 +179,15 @@ clangd::CodeCompleteOptions CCOpts; CCOpts.EnableSnippets = EnableSnippets; CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; + if (EnableIndexBasedCompletion) { +// Disable sema code completion for qualified code completion and use global +// symbol index instead. +CCOpts.IncludeNamespaceLevelDecls = false; + } // Initialize and run ClangdLSPServer. ClangdLSPServer LSPServer(Out, WorkerThreadsCount, StorePreamblesInMemory, -CCOpts, ResourceDirRef, -CompileCommandsDirPath); +CCOpts, ResourceDirRef, CompileCommandsDirPath, +EnableIndexBasedCompletion, {}); constexpr int NoShutdownRequestErrorCode = 1; llvm::set_thread_name("clangd.main"); return LSPServer.run(std::cin) ? 0 : NoShutdownRequestErrorCode; Index: clangd/SymbolIndex.h === --- /dev/null +++ clangd/SymbolIndex.h @@ -0,0 +1,60 @@ +//===--- CompletionIndex.h - Index for code completion ---*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLINDEX_H + +#include "SymbolCompletionInfo.h" +#include "llvm/Support/Error.h" + +namespace clang { +namespace clangd { + +struct CompletionRequest { + std::string Query; + size_t MaxCandidateCount = UINT_MAX; +}; + +/// \brief Signals for scoring completion candidates. +struct ScoreSignals { + // FIXME: add score signals like cross-reference count. +}; + +struct CompletionSymbol { + ScoreSignals Signals; + float SymbolScore; + + std::string USR; + index::SymbolKind Kind; + std::string QualifiedName; + + SymbolCompletionInfo CompletionInfo; +}; + +struct CompletionResult { + std::vector Symbols; + bool AllMatched; +}; + +class SymbolIndex { +public: + virtual ~SymbolIndex() = default; + + virtual llvm::Expected + complete(const CompletionRequest &Req) const = 0; + + // FIXME: add interfaces for more index use cases: + // - Symbol getSymbolInfo(llvm::StringRef USR); + // - getAllOccurrences(llvm::StringRef USR); +}; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLINDEX_H Index: clangd/SymbolCompletionInfo.h ===
[PATCH] D40548: [clangd] Symbol index interfaces and index-based code completion.
ioeric updated this revision to Diff 125960. ioeric added a comment. - Use IncludeNamespaceLevelDecls option; fix some broken tests. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D40548 Files: clangd/CMakeLists.txt clangd/ClangdIndex.cpp clangd/ClangdIndex.h clangd/ClangdLSPServer.cpp clangd/ClangdLSPServer.h clangd/ClangdServer.cpp clangd/ClangdServer.h clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/ClangdUnitStore.cpp clangd/ClangdUnitStore.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/Symbol.cpp clangd/Symbol.h clangd/SymbolCompletionInfo.cpp clangd/SymbolCompletionInfo.h clangd/SymbolIndex.h clangd/tool/ClangdMain.cpp unittests/clangd/CMakeLists.txt unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- /dev/null +++ unittests/clangd/SymbolCollectorTests.cpp @@ -0,0 +1,110 @@ +//===-- SymbolCollectorTests.cpp ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// + +#include "Symbol.h" +#include "clang/Index/IndexingAction.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +#include +#include + +using testing::ElementsAre; +using testing::Eq; +using testing::Field; + +namespace clang { +namespace clangd { + +namespace { +class SymbolIndexActionFactory : public tooling::FrontendActionFactory { + public: + SymbolIndexActionFactory() = default; + + clang::FrontendAction *create() override { +index::IndexingOptions IndexOpts; +IndexOpts.SystemSymbolFilter = +index::IndexingOptions::SystemSymbolFilterKind::All; +IndexOpts.IndexFunctionLocals = false; +Collector = std::make_shared(); +FrontendAction *Action = +index::createIndexingAction(Collector, IndexOpts, nullptr).release(); +return Action; + } + + std::shared_ptr Collector; +}; + +class SymbolCollectorTest : public ::testing::Test { +public: + bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) { +llvm::IntrusiveRefCntPtr InMemoryFileSystem( +new vfs::InMemoryFileSystem); +llvm::IntrusiveRefCntPtr Files( +new FileManager(FileSystemOptions(), InMemoryFileSystem)); + +const std::string FileName = "symbol.cc"; +const std::string HeaderName = "symbols.h"; +auto Factory = llvm::make_unique(); + +tooling::ToolInvocation Invocation( +{"symbol_collector", "-fsyntax-only", "-std=c++11", FileName}, +Factory->create(), Files.get(), +std::make_shared()); + +InMemoryFileSystem->addFile(HeaderName, 0, +llvm::MemoryBuffer::getMemBuffer(HeaderCode)); + +std::string Content = "#include\"" + std::string(HeaderName) + "\""; +Content += "\n" + MainCode.str(); +InMemoryFileSystem->addFile(FileName, 0, +llvm::MemoryBuffer::getMemBuffer(Content)); +Invocation.run(); +Symbols = Factory->Collector->getSymbols(); +return true; + } + +protected: + std::set Symbols; +}; + +TEST_F(SymbolCollectorTest, CollectSymbol) { + const std::string Header = R"( +class Foo { + void f(); +}; +void f1(); +inline void f2() {} + )"; + const std::string Main = R"( +namespace { +void ff() {} // ignore +} +void f1() {} + )"; + runSymbolCollector(Header, Main); + EXPECT_THAT(Symbols, + UnorderedElementsAre(Field(&Symbol::QualifiedName, Eq("Foo")), + Field(&Symbol::QualifiedName, Eq("Foo::f")), + Field(&Symbol::QualifiedName, Eq("f1")), + Field(&Symbol::QualifiedName, Eq("f2"; +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt === --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -15,6 +15,7 @@ JSONExprTests.cpp TestFS.cpp TraceTests.cpp + SymbolCollectorTests.cpp ) target_link_libraries(ClangdTests Index: clangd/tool/ClangdMain.cpp === --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -90,6 +90,15 @@ "Trace internal events and timestamps in chrome://tracing JSON format"), llvm::cl::init(""),