[PATCH] D42942: [clangd] Collect definitions when indexing.
This revision was automatically updated to reflect the committed changes. Closed by commit rCTE324735: [clangd] Collect definitions when indexing. (authored by sammccall, committed by ). Changed prior to commit: https://reviews.llvm.org/D42942?vs=133502=133613#toc Repository: rL LLVM https://reviews.llvm.org/D42942 Files: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/index/SymbolYAML.h clangd/tool/ClangdMain.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/IndexTests.cpp === --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -248,6 +248,28 @@ EXPECT_EQ(M.Detail->Documentation, "--doc--"); } +TEST(MergeTest, PreferSymbolWithDefn) { + Symbol L, R; + Symbol::Details Scratch; + + L.ID = R.ID = SymbolID("hello"); + L.CanonicalDeclaration.FileURI = "file:/left.h"; + R.CanonicalDeclaration.FileURI = "file:/right.h"; + L.CompletionPlainInsertText = "left-insert"; + R.CompletionPlainInsertText = "right-insert"; + + Symbol M = mergeSymbol(L, R, ); + EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:/left.h"); + EXPECT_EQ(M.Definition.FileURI, ""); + EXPECT_EQ(M.CompletionPlainInsertText, "left-insert"); + + R.Definition.FileURI = "file:/right.cpp"; // Now right will be favored. + M = mergeSymbol(L, R, ); + EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:/right.h"); + EXPECT_EQ(M.Definition.FileURI, "file:/right.cpp"); + EXPECT_EQ(M.CompletionPlainInsertText, "right-insert"); +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -47,12 +47,17 @@ } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; } -MATCHER_P(LocationOffsets, Offsets, "") { +MATCHER_P(DeclRange, Offsets, "") { // Offset range in SymbolLocation is [start, end] while in Clangd is [start, // end). + // FIXME: SymbolLocation should be [start, end). return arg.CanonicalDeclaration.StartOffset == Offsets.first && arg.CanonicalDeclaration.EndOffset == Offsets.second - 1; } +MATCHER_P(DefRange, Offsets, "") { + return arg.Definition.StartOffset == Offsets.first && + arg.Definition.EndOffset == Offsets.second - 1; +} namespace clang { namespace clangd { @@ -97,23 +102,18 @@ auto Factory = llvm::make_unique(CollectorOpts); std::vector Args = {"symbol_collector", "-fsyntax-only", - "-std=c++11", TestFileName}; + "-std=c++11", "-include", + TestHeaderName, TestFileName}; Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); tooling::ToolInvocation Invocation( Args, Factory->create(), Files.get(), std::make_shared()); InMemoryFileSystem->addFile(TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); - -std::string Content = MainCode; -if (!HeaderCode.empty()) - Content = (llvm::Twine("#include\"") + - llvm::sys::path::filename(TestHeaderName) + "\"\n" + Content) -.str(); InMemoryFileSystem->addFile(TestFileName, 0, -llvm::MemoryBuffer::getMemBuffer(Content)); +llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); Symbols = Factory->Collector->takeSymbols(); return true; @@ -176,6 +176,42 @@ QName("foo::bar::v2"), QName("foo::baz")})); } +TEST_F(SymbolCollectorTest, Locations) { + // FIXME: the behavior here is bad: chopping tokens, including more than the + // ident range, using half-open ranges. See fixmes in getSymbolLocation(). + CollectorOpts.IndexMainFiles = true; + Annotations Header(R"cpp( +// Declared in header, defined in main. +$xdecl[[extern int X]]; +$clsdecl[[class C]]ls; +$printdecl[[void print()]]; + +// Declared in header, defined nowhere. +$zdecl[[extern int Z]]; + )cpp"); + Annotations Main(R"cpp( +$xdef[[int X = 4]]2; +$clsdef[[class Cls {}]]; +$printdef[[void print() {}]] + +// Declared/defined in main only. +$y[[int Y]]; + )cpp"); + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT( + Symbols, + UnorderedElementsAre( + AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")), +DefRange(Main.offsetRange("xdef"))), + AllOf(QName("Cls"),
[PATCH] D42942: [clangd] Collect definitions when indexing.
This revision was automatically updated to reflect the committed changes. Closed by commit rL324735: [clangd] Collect definitions when indexing. (authored by sammccall, committed by ). Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D42942?vs=133502=133612#toc Repository: rL LLVM https://reviews.llvm.org/D42942 Files: clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clang-tools-extra/trunk/clangd/index/Index.cpp clang-tools-extra/trunk/clangd/index/Index.h clang-tools-extra/trunk/clangd/index/Merge.cpp clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp clang-tools-extra/trunk/clangd/index/SymbolCollector.h clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp clang-tools-extra/trunk/clangd/index/SymbolYAML.h clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Index: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp === --- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp @@ -47,12 +47,17 @@ } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; } -MATCHER_P(LocationOffsets, Offsets, "") { +MATCHER_P(DeclRange, Offsets, "") { // Offset range in SymbolLocation is [start, end] while in Clangd is [start, // end). + // FIXME: SymbolLocation should be [start, end). return arg.CanonicalDeclaration.StartOffset == Offsets.first && arg.CanonicalDeclaration.EndOffset == Offsets.second - 1; } +MATCHER_P(DefRange, Offsets, "") { + return arg.Definition.StartOffset == Offsets.first && + arg.Definition.EndOffset == Offsets.second - 1; +} namespace clang { namespace clangd { @@ -97,23 +102,18 @@ auto Factory = llvm::make_unique(CollectorOpts); std::vector Args = {"symbol_collector", "-fsyntax-only", - "-std=c++11", TestFileName}; + "-std=c++11", "-include", + TestHeaderName, TestFileName}; Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); tooling::ToolInvocation Invocation( Args, Factory->create(), Files.get(), std::make_shared()); InMemoryFileSystem->addFile(TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); - -std::string Content = MainCode; -if (!HeaderCode.empty()) - Content = (llvm::Twine("#include\"") + - llvm::sys::path::filename(TestHeaderName) + "\"\n" + Content) -.str(); InMemoryFileSystem->addFile(TestFileName, 0, -llvm::MemoryBuffer::getMemBuffer(Content)); +llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); Symbols = Factory->Collector->takeSymbols(); return true; @@ -176,6 +176,42 @@ QName("foo::bar::v2"), QName("foo::baz")})); } +TEST_F(SymbolCollectorTest, Locations) { + // FIXME: the behavior here is bad: chopping tokens, including more than the + // ident range, using half-open ranges. See fixmes in getSymbolLocation(). + CollectorOpts.IndexMainFiles = true; + Annotations Header(R"cpp( +// Declared in header, defined in main. +$xdecl[[extern int X]]; +$clsdecl[[class C]]ls; +$printdecl[[void print()]]; + +// Declared in header, defined nowhere. +$zdecl[[extern int Z]]; + )cpp"); + Annotations Main(R"cpp( +$xdef[[int X = 4]]2; +$clsdef[[class Cls {}]]; +$printdef[[void print() {}]] + +// Declared/defined in main only. +$y[[int Y]]; + )cpp"); + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT( + Symbols, + UnorderedElementsAre( + AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")), +DefRange(Main.offsetRange("xdef"))), + AllOf(QName("Cls"), DeclRange(Header.offsetRange("clsdecl")), +DefRange(Main.offsetRange("clsdef"))), + AllOf(QName("print"), DeclRange(Header.offsetRange("printdecl")), +DefRange(Main.offsetRange("printdef"))), + AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl"))), + AllOf(QName("Y"), DeclRange(Main.offsetRange("y")), +DefRange(Main.offsetRange("y"); +} + TEST_F(SymbolCollectorTest, SymbolRelativeNoFallback) { CollectorOpts.IndexMainFiles = false; runSymbolCollector("class Foo {};", /*Main=*/""); @@ -280,10 +316,9 @@ EXPECT_THAT( Symbols, UnorderedElementsAre( - AllOf(QName("abc_Test"), -
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall marked an inline comment as done. sammccall added inline comments. Comment at: clangd/index/SymbolCollector.cpp:210 + BasicSymbol = addDeclaration(*ND, std::move(ID)); +if (Roles & static_cast(index::SymbolRole::Definition)) + addDefinition(*cast(ASTNode.OrigD), *BasicSymbol); ioeric wrote: > sammccall wrote: > > hokein wrote: > > > ioeric wrote: > > > > It seems that we store definition even if it's the same as declaration, > > > > which might be true for most classes? This is probably fine, but it > > > > would be worth documenting. Or maybe consider not storing the same > > > > location twice? > > > I think it is fine to store the same location twice. We can't tell > > > whether the CanonicalLoc is a definition or not. > > Documented that these may be the same. > > We wouldn't actually save any memory by avoiding saving this twice - the > > filename is a stringref to the same data, and the offsets get stored even > > if they're null. > I'm less concerned about space. I think this would also make it easy to check > whether a symbol has a separate definition (e.g. we might want to do this for > go-to-definition/go-to-declaration). A comparison would also work but seems > less natural. But I'm not sure, up tp you :) Yeah sometimes we'll definitely want to check if they're different, but other times we'll want to use one without caring if they are. Comparison seems less surprising than either of the fields sometimes being empty even though data is available - that seems likely to lead to bugs I think. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
ioeric accepted this revision. ioeric added a comment. Lg Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67 + // XXX this is just to make running the tool fast during dev! + bool BeginInvocation(CompilerInstance ) override { +const auto = CI.getInvocation().getFrontendOpts().Inputs; hokein wrote: > ioeric wrote: > > sammccall wrote: > > > hokein wrote: > > > > It is fine for debugging, but I think we don't want this behavior by > > > > default. > > > > > > > > global-symbol-builder also supports running a single TU > > > > (`global-symbol-builder /path/to/file`), which is sufficient for > > > > debugging, I think? > > > > > > > Yeah, I'm not going to check this in, thus the XXX comment :-) > > > > > > Single TU isn't enough - it doesn't test the reducer. On the other hand > > > the full compilation database is too big. So this option would actually > > > be useful! But it doesn't belong here. > > Drive-by: you could also run the tool in the default standalone mode and > > provide a list of files. > However the default standalone mode is not multiple thread :(. You are right :) But I don't think this matters much if you only want to test reduction. Comment at: clangd/index/SymbolCollector.cpp:210 + BasicSymbol = addDeclaration(*ND, std::move(ID)); +if (Roles & static_cast(index::SymbolRole::Definition)) + addDefinition(*cast(ASTNode.OrigD), *BasicSymbol); sammccall wrote: > hokein wrote: > > ioeric wrote: > > > It seems that we store definition even if it's the same as declaration, > > > which might be true for most classes? This is probably fine, but it would > > > be worth documenting. Or maybe consider not storing the same location > > > twice? > > I think it is fine to store the same location twice. We can't tell whether > > the CanonicalLoc is a definition or not. > Documented that these may be the same. > We wouldn't actually save any memory by avoiding saving this twice - the > filename is a stringref to the same data, and the offsets get stored even if > they're null. I'm less concerned about space. I think this would also make it easy to check whether a symbol has a separate definition (e.g. we might want to do this for go-to-definition/go-to-declaration). A comparison would also work but seems less natural. But I'm not sure, up tp you :) Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
hokein accepted this revision. hokein added a comment. This revision is now accepted and ready to land. LGTM. Comment at: clangd/index/Index.h:129 // - // A C++ symbol could have multiple declarations and one definition (e.g. - // a function is declared in ".h" file, and is defined in ".cc" file). - // * For classes, the canonical declaration is usually definition. - // * For non-inline functions, the canonical declaration is a declaration - // (not a definition), which is usually declared in ".h" file. + // A C++ symbol may have mulitple declarations, and we pick one to prefer. + // * For classes, the canonical declaration should be the definition. a typo here? `multiple`? Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall updated this revision to Diff 133502. sammccall added a comment. Revert hack in global-symbol-builder to filter the input files. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 Files: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/index/SymbolYAML.h clangd/tool/ClangdMain.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -47,12 +47,17 @@ } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; } -MATCHER_P(LocationOffsets, Offsets, "") { +MATCHER_P(DeclRange, Offsets, "") { // Offset range in SymbolLocation is [start, end] while in Clangd is [start, // end). + // FIXME: SymbolLocation should be [start, end). return arg.CanonicalDeclaration.StartOffset == Offsets.first && arg.CanonicalDeclaration.EndOffset == Offsets.second - 1; } +MATCHER_P(DefRange, Offsets, "") { + return arg.Definition.StartOffset == Offsets.first && + arg.Definition.EndOffset == Offsets.second - 1; +} namespace clang { namespace clangd { @@ -97,23 +102,18 @@ auto Factory = llvm::make_unique(CollectorOpts); std::vector Args = {"symbol_collector", "-fsyntax-only", - "-std=c++11", TestFileName}; + "-std=c++11", "-include", + TestHeaderName, TestFileName}; Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); tooling::ToolInvocation Invocation( Args, Factory->create(), Files.get(), std::make_shared()); InMemoryFileSystem->addFile(TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); - -std::string Content = MainCode; -if (!HeaderCode.empty()) - Content = (llvm::Twine("#include\"") + - llvm::sys::path::filename(TestHeaderName) + "\"\n" + Content) -.str(); InMemoryFileSystem->addFile(TestFileName, 0, -llvm::MemoryBuffer::getMemBuffer(Content)); +llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); Symbols = Factory->Collector->takeSymbols(); return true; @@ -176,6 +176,42 @@ QName("foo::bar::v2"), QName("foo::baz")})); } +TEST_F(SymbolCollectorTest, Locations) { + // FIXME: the behavior here is bad: chopping tokens, including more than the + // ident range, using half-open ranges. See fixmes in getSymbolLocation(). + CollectorOpts.IndexMainFiles = true; + Annotations Header(R"cpp( +// Declared in header, defined in main. +$xdecl[[extern int X]]; +$clsdecl[[class C]]ls; +$printdecl[[void print()]]; + +// Declared in header, defined nowhere. +$zdecl[[extern int Z]]; + )cpp"); + Annotations Main(R"cpp( +$xdef[[int X = 4]]2; +$clsdef[[class Cls {}]]; +$printdef[[void print() {}]] + +// Declared/defined in main only. +$y[[int Y]]; + )cpp"); + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT( + Symbols, + UnorderedElementsAre( + AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")), +DefRange(Main.offsetRange("xdef"))), + AllOf(QName("Cls"), DeclRange(Header.offsetRange("clsdecl")), +DefRange(Main.offsetRange("clsdef"))), + AllOf(QName("print"), DeclRange(Header.offsetRange("printdecl")), +DefRange(Main.offsetRange("printdef"))), + AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl"))), + AllOf(QName("Y"), DeclRange(Main.offsetRange("y")), +DefRange(Main.offsetRange("y"); +} + TEST_F(SymbolCollectorTest, SymbolRelativeNoFallback) { CollectorOpts.IndexMainFiles = false; runSymbolCollector("class Foo {};", /*Main=*/""); @@ -280,10 +316,9 @@ EXPECT_THAT( Symbols, UnorderedElementsAre( - AllOf(QName("abc_Test"), -LocationOffsets(Header.offsetRange("expansion")), + AllOf(QName("abc_Test"), DeclRange(Header.offsetRange("expansion")), DeclURI(TestHeaderURI)), - AllOf(QName("Test"), LocationOffsets(Header.offsetRange("spelling")), + AllOf(QName("Test"), DeclRange(Header.offsetRange("spelling")), DeclURI(TestHeaderURI; } @@ -302,13 +337,13 @@ FF2(); )"); runSymbolCollector(/*Header=*/"", Main.code()); - EXPECT_THAT(Symbols, UnorderedElementsAre( -
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall added a comment. Thanks for the comments! And sorry for the delay, I was haunted by useless gtest messages, which https://reviews.llvm.org/D43091 should fix. Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:159 + // Output phase: emit YAML for result symbols. for (const auto : UniqueSymbols) +llvm::outs() << SymbolToYAML(Sym); hokein wrote: > nit: we could get rid of the loop by using `SymbolsToYAML(UniqueSymbols)` . Yeah, but we'd put the whole text form of the index into a single in-memory string, which seems pretty wasteful. Building a YAML output for each seems silly too though. Changed `SymbolsToYAML` to take a `raw_ostream` ref parameter to write to. Comment at: clangd/index/Merge.cpp:93 S.Detail = Scratch; } else if (L.Detail) +/* Current state: S.Detail = O.Detail */; ioeric wrote: > `else if (S.Detail)`? > > `/*Current state: S.Detail = S.Detail*/`? Good catch. Reworked the structure here to avoid needing to name this case, since it was awkward. Comment at: clangd/index/SymbolCollector.cpp:210 + BasicSymbol = addDeclaration(*ND, std::move(ID)); +if (Roles & static_cast(index::SymbolRole::Definition)) + addDefinition(*cast(ASTNode.OrigD), *BasicSymbol); hokein wrote: > ioeric wrote: > > It seems that we store definition even if it's the same as declaration, > > which might be true for most classes? This is probably fine, but it would > > be worth documenting. Or maybe consider not storing the same location twice? > I think it is fine to store the same location twice. We can't tell whether > the CanonicalLoc is a definition or not. Documented that these may be the same. We wouldn't actually save any memory by avoiding saving this twice - the filename is a stringref to the same data, and the offsets get stored even if they're null. Comment at: clangd/index/SymbolCollector.cpp:236 + std::string FileURI; + // FIXME: we may want a different "canonical" heuristic than clang chooses. + if (auto DeclLoc = getSymbolLocation(ND, SM, Opts, FileURI)) ioeric wrote: > Could you elaborate a bit more on this one? What is the current heuristic, > and what would we prefer? Done a bit, though I don't know the detailed answer to either question. Using forward declarations of classes as canonical indicates that we probably want something better. Comment at: unittests/clangd/SymbolCollectorTests.cpp:108 + "-std=c++11", + "-include", + llvm::sys::path::filename(TestHeaderName), ioeric wrote: > neat! Indeed :-) This is needed because otherwise all the offsets in the annotated testcase are off. I got it slightly wrong though: need the full path to the header name because it's not resolved relative to the main file. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall updated this revision to Diff 133501. sammccall marked 6 inline comments as done. sammccall added a comment. Address review comments. Make SymbolsToYAML take an ostream instead of returning a string. Define SymbolSlab::iterator so googletest will print it as a container. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 Files: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/index/SymbolYAML.h clangd/tool/ClangdMain.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -47,12 +47,17 @@ } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; } -MATCHER_P(LocationOffsets, Offsets, "") { +MATCHER_P(DeclRange, Offsets, "") { // Offset range in SymbolLocation is [start, end] while in Clangd is [start, // end). + // FIXME: SymbolLocation should be [start, end). return arg.CanonicalDeclaration.StartOffset == Offsets.first && arg.CanonicalDeclaration.EndOffset == Offsets.second - 1; } +MATCHER_P(DefRange, Offsets, "") { + return arg.Definition.StartOffset == Offsets.first && + arg.Definition.EndOffset == Offsets.second - 1; +} namespace clang { namespace clangd { @@ -97,23 +102,18 @@ auto Factory = llvm::make_unique(CollectorOpts); std::vector Args = {"symbol_collector", "-fsyntax-only", - "-std=c++11", TestFileName}; + "-std=c++11", "-include", + TestHeaderName, TestFileName}; Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); tooling::ToolInvocation Invocation( Args, Factory->create(), Files.get(), std::make_shared()); InMemoryFileSystem->addFile(TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); - -std::string Content = MainCode; -if (!HeaderCode.empty()) - Content = (llvm::Twine("#include\"") + - llvm::sys::path::filename(TestHeaderName) + "\"\n" + Content) -.str(); InMemoryFileSystem->addFile(TestFileName, 0, -llvm::MemoryBuffer::getMemBuffer(Content)); +llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); Symbols = Factory->Collector->takeSymbols(); return true; @@ -176,6 +176,42 @@ QName("foo::bar::v2"), QName("foo::baz")})); } +TEST_F(SymbolCollectorTest, Locations) { + // FIXME: the behavior here is bad: chopping tokens, including more than the + // ident range, using half-open ranges. See fixmes in getSymbolLocation(). + CollectorOpts.IndexMainFiles = true; + Annotations Header(R"cpp( +// Declared in header, defined in main. +$xdecl[[extern int X]]; +$clsdecl[[class C]]ls; +$printdecl[[void print()]]; + +// Declared in header, defined nowhere. +$zdecl[[extern int Z]]; + )cpp"); + Annotations Main(R"cpp( +$xdef[[int X = 4]]2; +$clsdef[[class Cls {}]]; +$printdef[[void print() {}]] + +// Declared/defined in main only. +$y[[int Y]]; + )cpp"); + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT( + Symbols, + UnorderedElementsAre( + AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")), +DefRange(Main.offsetRange("xdef"))), + AllOf(QName("Cls"), DeclRange(Header.offsetRange("clsdecl")), +DefRange(Main.offsetRange("clsdef"))), + AllOf(QName("print"), DeclRange(Header.offsetRange("printdecl")), +DefRange(Main.offsetRange("printdef"))), + AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl"))), + AllOf(QName("Y"), DeclRange(Main.offsetRange("y")), +DefRange(Main.offsetRange("y"); +} + TEST_F(SymbolCollectorTest, SymbolRelativeNoFallback) { CollectorOpts.IndexMainFiles = false; runSymbolCollector("class Foo {};", /*Main=*/""); @@ -280,10 +316,9 @@ EXPECT_THAT( Symbols, UnorderedElementsAre( - AllOf(QName("abc_Test"), -LocationOffsets(Header.offsetRange("expansion")), + AllOf(QName("abc_Test"), DeclRange(Header.offsetRange("expansion")), DeclURI(TestHeaderURI)), - AllOf(QName("Test"), LocationOffsets(Header.offsetRange("spelling")), + AllOf(QName("Test"), DeclRange(Header.offsetRange("spelling")), DeclURI(TestHeaderURI; } @@ -302,13
[PATCH] D42942: [clangd] Collect definitions when indexing.
hokein added inline comments. Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67 + // XXX this is just to make running the tool fast during dev! + bool BeginInvocation(CompilerInstance ) override { +const auto = CI.getInvocation().getFrontendOpts().Inputs; ioeric wrote: > sammccall wrote: > > hokein wrote: > > > It is fine for debugging, but I think we don't want this behavior by > > > default. > > > > > > global-symbol-builder also supports running a single TU > > > (`global-symbol-builder /path/to/file`), which is sufficient for > > > debugging, I think? > > > > > Yeah, I'm not going to check this in, thus the XXX comment :-) > > > > Single TU isn't enough - it doesn't test the reducer. On the other hand the > > full compilation database is too big. So this option would actually be > > useful! But it doesn't belong here. > Drive-by: you could also run the tool in the default standalone mode and > provide a list of files. However the default standalone mode is not multiple thread :(. Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:159 + // Output phase: emit YAML for result symbols. for (const auto : UniqueSymbols) +llvm::outs() << SymbolToYAML(Sym); nit: we could get rid of the loop by using `SymbolsToYAML(UniqueSymbols)` . Comment at: clangd/index/SymbolCollector.cpp:137 +// FIXME: EndOffset is inclusive (closed range), and should be exclusive. +// FIXME: Because the underlying ranges are token ranges, this code chops the +//last token in half if it contains multiple characters. That's bad, thanks for finding it. Comment at: clangd/index/SymbolCollector.cpp:210 + BasicSymbol = addDeclaration(*ND, std::move(ID)); +if (Roles & static_cast(index::SymbolRole::Definition)) + addDefinition(*cast(ASTNode.OrigD), *BasicSymbol); ioeric wrote: > It seems that we store definition even if it's the same as declaration, which > might be true for most classes? This is probably fine, but it would be worth > documenting. Or maybe consider not storing the same location twice? I think it is fine to store the same location twice. We can't tell whether the CanonicalLoc is a definition or not. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
ioeric added inline comments. Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67 + // XXX this is just to make running the tool fast during dev! + bool BeginInvocation(CompilerInstance ) override { +const auto = CI.getInvocation().getFrontendOpts().Inputs; sammccall wrote: > hokein wrote: > > It is fine for debugging, but I think we don't want this behavior by > > default. > > > > global-symbol-builder also supports running a single TU > > (`global-symbol-builder /path/to/file`), which is sufficient for debugging, > > I think? > > > Yeah, I'm not going to check this in, thus the XXX comment :-) > > Single TU isn't enough - it doesn't test the reducer. On the other hand the > full compilation database is too big. So this option would actually be > useful! But it doesn't belong here. Drive-by: you could also run the tool in the default standalone mode and provide a list of files. Comment at: clangd/index/Merge.cpp:93 S.Detail = Scratch; } else if (L.Detail) +/* Current state: S.Detail = O.Detail */; `else if (S.Detail)`? `/*Current state: S.Detail = S.Detail*/`? Comment at: clangd/index/SymbolCollector.cpp:210 + BasicSymbol = addDeclaration(*ND, std::move(ID)); +if (Roles & static_cast(index::SymbolRole::Definition)) + addDefinition(*cast(ASTNode.OrigD), *BasicSymbol); It seems that we store definition even if it's the same as declaration, which might be true for most classes? This is probably fine, but it would be worth documenting. Or maybe consider not storing the same location twice? Comment at: clangd/index/SymbolCollector.cpp:236 + std::string FileURI; + // FIXME: we may want a different "canonical" heuristic than clang chooses. + if (auto DeclLoc = getSymbolLocation(ND, SM, Opts, FileURI)) Could you elaborate a bit more on this one? What is the current heuristic, and what would we prefer? Comment at: unittests/clangd/SymbolCollectorTests.cpp:49 MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } +MATCHER_P(CPath, P, "") { return arg.CanonicalDeclaration.FilePath == P; } MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; } Fallout of a git merge? Comment at: unittests/clangd/SymbolCollectorTests.cpp:108 + "-std=c++11", + "-include", + llvm::sys::path::filename(TestHeaderName), neat! Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall added a comment. OK, I think this is good to go now. Rebased against Eric's URI change and added tests. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall updated this revision to Diff 133024. sammccall added a comment. Add tests for SymbolCollector (finding def/decl locations) and merge. Found some bugs in SymbolCollector's locations - added fixmes. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 Files: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/index/SymbolYAML.h clangd/tool/ClangdMain.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -46,13 +46,19 @@ return arg.CompletionSnippetInsertText == S; } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } +MATCHER_P(CPath, P, "") { return arg.CanonicalDeclaration.FilePath == P; } MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; } -MATCHER_P(LocationOffsets, Offsets, "") { +MATCHER_P(DeclRange, Offsets, "") { // Offset range in SymbolLocation is [start, end] while in Clangd is [start, // end). + // FIXME: SymbolLocation should be [start, end). return arg.CanonicalDeclaration.StartOffset == Offsets.first && arg.CanonicalDeclaration.EndOffset == Offsets.second - 1; } +MATCHER_P(DefRange, Offsets, "") { + return arg.Definition.StartOffset == Offsets.first && + arg.Definition.EndOffset == Offsets.second - 1; +} namespace clang { namespace clangd { @@ -96,24 +102,22 @@ auto Factory = llvm::make_unique(CollectorOpts); -std::vector Args = {"symbol_collector", "-fsyntax-only", - "-std=c++11", TestFileName}; +std::vector Args = {"symbol_collector", + "-fsyntax-only", + "-std=c++11", + "-include", + llvm::sys::path::filename(TestHeaderName), + TestFileName}; Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); tooling::ToolInvocation Invocation( Args, Factory->create(), Files.get(), std::make_shared()); InMemoryFileSystem->addFile(TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); - -std::string Content = MainCode; -if (!HeaderCode.empty()) - Content = (llvm::Twine("#include\"") + - llvm::sys::path::filename(TestHeaderName) + "\"\n" + Content) -.str(); InMemoryFileSystem->addFile(TestFileName, 0, -llvm::MemoryBuffer::getMemBuffer(Content)); +llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); Symbols = Factory->Collector->takeSymbols(); return true; @@ -176,6 +180,42 @@ QName("foo::bar::v2"), QName("foo::baz")})); } +TEST_F(SymbolCollectorTest, Locations) { + // FIXME: the behavior here is bad: chopping tokens, including more than the + // ident range, using half-open ranges. See fixmes in getSymbolLocation(). + CollectorOpts.IndexMainFiles = true; + Annotations Header(R"cpp( +// Declared in header, defined in main. +$xdecl[[extern int X]]; +$clsdecl[[class C]]ls; +$printdecl[[void print()]]; + +// Declared in header, defined nowhere. +$zdecl[[extern int Z]]; + )cpp"); + Annotations Main(R"cpp( +$xdef[[int X = 4]]2; +$clsdef[[class Cls {}]]; +$printdef[[void print() {}]] + +// Declared/defined in main only. +$y[[int Y]]; + )cpp"); + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT( + Symbols, + UnorderedElementsAre( + AllOf(QName("X"), DeclRange(Header.offsetRange("xdecl")), +DefRange(Main.offsetRange("xdef"))), + AllOf(QName("Cls"), DeclRange(Header.offsetRange("clsdecl")), +DefRange(Main.offsetRange("clsdef"))), + AllOf(QName("print"), DeclRange(Header.offsetRange("printdecl")), +DefRange(Main.offsetRange("printdef"))), + AllOf(QName("Z"), DeclRange(Header.offsetRange("zdecl"))), + AllOf(QName("Y"), DeclRange(Main.offsetRange("y")), +DefRange(Main.offsetRange("y"); +} + TEST_F(SymbolCollectorTest, SymbolRelativeNoFallback) { CollectorOpts.IndexMainFiles = false; runSymbolCollector("class Foo {};", /*Main=*/""); @@ -280,10 +320,9 @@ EXPECT_THAT( Symbols, UnorderedElementsAre( - AllOf(QName("abc_Test"), -LocationOffsets(Header.offsetRange("expansion")), + AllOf(QName("abc_Test"), DeclRange(Header.offsetRange("expansion")),
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall added a comment. Thanks for comments! (Still not done, adding tests) Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67 + // XXX this is just to make running the tool fast during dev! + bool BeginInvocation(CompilerInstance ) override { +const auto = CI.getInvocation().getFrontendOpts().Inputs; hokein wrote: > It is fine for debugging, but I think we don't want this behavior by default. > > global-symbol-builder also supports running a single TU > (`global-symbol-builder /path/to/file`), which is sufficient for debugging, I > think? > Yeah, I'm not going to check this in, thus the XXX comment :-) Single TU isn't enough - it doesn't test the reducer. On the other hand the full compilation database is too big. So this option would actually be useful! But it doesn't belong here. Comment at: clangd/index/Index.h:131 SymbolLocation CanonicalDeclaration; + SymbolLocation Definition; hokein wrote: > We might want to update the comment above. I think the comment above is still valid, but added one for definition. Comment at: clangd/index/Merge.cpp:71 + if (!S.CanonicalDeclaration || R.Definition) S.CanonicalDeclaration = R.CanonicalDeclaration; if (S.CompletionLabel == "") ioeric wrote: > Do we also need to copy other information such as completion detail, since > forward declarations usually have minimum symbol information? Done. Now if one of the symbols has a definition and the other doesn't, we prefer the one that does. Otherwise we fall back to preferring L over R. This means we prefer a definition from the global index over a declaration from the local one, which seems OK for now. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall updated this revision to Diff 132985. sammccall marked 3 inline comments as done. sammccall added a comment. [clangd] Prefer data from TUs with symbol defn to data from TUs without. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 Files: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/index/SymbolYAML.h clangd/tool/ClangdMain.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -461,18 +461,18 @@ ... )"; - auto Symbols1 = SymbolFromYAML(YAML1); + auto Symbols1 = SymbolsFromYAML(YAML1); EXPECT_THAT(Symbols1, UnorderedElementsAre( AllOf(QName("clang::Foo1"), Labeled("Foo1-label"), Doc("Foo doc"), Detail("int"; - auto Symbols2 = SymbolFromYAML(YAML2); + auto Symbols2 = SymbolsFromYAML(YAML2); EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf(QName("clang::Foo2"), Labeled("Foo2-label"), Not(HasDetail(); std::string ConcatenatedYAML = SymbolsToYAML(Symbols1) + SymbolsToYAML(Symbols2); - auto ConcatenatedSymbols = SymbolFromYAML(ConcatenatedYAML); + auto ConcatenatedSymbols = SymbolsFromYAML(ConcatenatedYAML); EXPECT_THAT(ConcatenatedSymbols, UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2"))); Index: clangd/tool/ClangdMain.cpp === --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -37,7 +37,7 @@ llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; return nullptr; } - auto Slab = SymbolFromYAML(Buffer.get()->getBuffer()); + auto Slab = SymbolsFromYAML(Buffer.get()->getBuffer()); SymbolSlab::Builder SymsBuilder; for (auto Sym : Slab) SymsBuilder.insert(Sym); Index: clangd/index/SymbolYAML.h === --- clangd/index/SymbolYAML.h +++ clangd/index/SymbolYAML.h @@ -25,7 +25,11 @@ namespace clangd { // Read symbols from a YAML-format string. -SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent); +SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent); + +// Read one symbol from a YAML-format string, backed by the arena. +Symbol SymbolFromYAML(llvm::StringRef YAMLContent, + llvm::BumpPtrAllocator ); // Convert a single symbol to YAML-format string. // The YAML result is safe to concatenate. Index: clangd/index/SymbolYAML.cpp === --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -97,7 +97,9 @@ IO.mapRequired("Name", Sym.Name); IO.mapRequired("Scope", Sym.Scope); IO.mapRequired("SymInfo", Sym.SymInfo); -IO.mapRequired("CanonicalDeclaration", Sym.CanonicalDeclaration); +IO.mapOptional("CanonicalDeclaration", Sym.CanonicalDeclaration, + SymbolLocation()); +IO.mapOptional("Definition", Sym.Definition, SymbolLocation()); IO.mapRequired("CompletionLabel", Sym.CompletionLabel); IO.mapRequired("CompletionFilterText", Sym.CompletionFilterText); IO.mapRequired("CompletionPlainInsertText", Sym.CompletionPlainInsertText); @@ -160,7 +162,7 @@ namespace clang { namespace clangd { -SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent) { +SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent) { // Store data of pointer fields (excl. `StringRef`) like `Detail`. llvm::BumpPtrAllocator Arena; llvm::yaml::Input Yin(YAMLContent, ); @@ -173,6 +175,14 @@ return std::move(Syms).build(); } +Symbol SymbolFromYAML(llvm::StringRef YAMLContent, + llvm::BumpPtrAllocator ) { + llvm::yaml::Input Yin(YAMLContent, ); + Symbol S; + Yin >> S; + return S; +} + std::string SymbolsToYAML(const SymbolSlab& Symbols) { std::string Str; llvm::raw_string_ostream OS(Str); Index: clangd/index/SymbolCollector.h === --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -53,6 +53,9 @@ SymbolSlab takeSymbols() { return std::move(Symbols).build(); } private: + const Symbol *addDeclaration(const NamedDecl &, SymbolID); + void addDefinition(const NamedDecl &, const Symbol ); + // All Symbols collected from the AST. SymbolSlab::Builder Symbols; ASTContext *ASTCtx; Index: clangd/index/SymbolCollector.cpp === ---
[PATCH] D42942: [clangd] Collect definitions when indexing.
hokein added a comment. I like where the patch is going now. Comment at: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67 + // XXX this is just to make running the tool fast during dev! + bool BeginInvocation(CompilerInstance ) override { +const auto = CI.getInvocation().getFrontendOpts().Inputs; It is fine for debugging, but I think we don't want this behavior by default. global-symbol-builder also supports running a single TU (`global-symbol-builder /path/to/file`), which is sufficient for debugging, I think? Comment at: clangd/index/Index.h:131 SymbolLocation CanonicalDeclaration; + SymbolLocation Definition; We might want to update the comment above. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
ioeric added a comment. looks good Comment at: clangd/index/Index.h:25 struct SymbolLocation { // The absolute path of the source file where a symbol occurs. It might be worth mentioning here whether the range covers the entire declaration/definition body, or just the symbol identifier. Comment at: clangd/index/Merge.cpp:71 + if (!S.CanonicalDeclaration || R.Definition) S.CanonicalDeclaration = R.CanonicalDeclaration; if (S.CompletionLabel == "") Do we also need to copy other information such as completion detail, since forward declarations usually have minimum symbol information? Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall added a comment. This does seem to get at least the simple cases right: ID: 0EF8AF4D08B11EBF3FFB8004CE702991B15F Name:SymbolsFromYAML Scope: 'clang::clangd::' SymInfo: Kind:Function Lang:C CanonicalDeclaration: StartOffset: 956 EndOffset: 1010 FilePath: /usr/local/google/home/sammccall/src/llvm/tools/clang/tools/extra/clangd/index/SymbolYAML.h Definition: StartOffset: 4999 EndOffset: 5348 FilePath: /usr/local/google/home/sammccall/src/llvm/tools/clang/tools/extra/clangd/index/SymbolYAML.cpp CompletionLabel: 'SymbolsFromYAML(llvm::StringRef YAMLContent)' CompletionFilterText: SymbolsFromYAML CompletionPlainInsertText: SymbolsFromYAML CompletionSnippetInsertText: 'SymbolsFromYAML(${1:llvm::StringRef YAMLContent})' Detail: Documentation: '' CompletionDetail: SymbolSlab Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42942: [clangd] Collect definitions when indexing.
sammccall created this revision. sammccall added reviewers: ioeric, hokein. Herald added subscribers: cfe-commits, jkorous-apple, ilya-biryukov, klimek. (This isn't done! Needs new tests, and some random cleanups should be split out. Looking for some early feedback.) Within a TU: - as now, collect a declaration from the first occurrence of a symbol (taking clang's canonical declaration) - when we first see a definition occurrence, copy the symbol and add it Across TUs/sources: - mergeSymbol in Merge.h is responsible for combining matching Symbols. This covers dynamic/static merges and cross-TU merges in the static index. - it prefers declarations from Symbols that have a definition. - GlobalSymbolBuilderMain is modified to use mergeSymbol as a reduce step. Random cleanups (can be pulled out): - SymbolFromYAML -> SymbolsFromYAML, new singular SymbolFromYAML added - avoid uninit'd SymbolLocations. Add an idiomatic way to check "absent". - CanonicalDeclaration (as well as Definition) are mapped as optional in YAML. - added operator<< for Symbol & SymbolLocation, for debugging Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D42942 Files: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/index/SymbolYAML.h clangd/tool/ClangdMain.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -439,18 +439,18 @@ ... )"; - auto Symbols1 = SymbolFromYAML(YAML1); + auto Symbols1 = SymbolsFromYAML(YAML1); EXPECT_THAT(Symbols1, UnorderedElementsAre( AllOf(QName("clang::Foo1"), Labeled("Foo1-label"), Doc("Foo doc"), Detail("int"; - auto Symbols2 = SymbolFromYAML(YAML2); + auto Symbols2 = SymbolsFromYAML(YAML2); EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf(QName("clang::Foo2"), Labeled("Foo2-label"), Not(HasDetail(); std::string ConcatenatedYAML = SymbolsToYAML(Symbols1) + SymbolsToYAML(Symbols2); - auto ConcatenatedSymbols = SymbolFromYAML(ConcatenatedYAML); + auto ConcatenatedSymbols = SymbolsFromYAML(ConcatenatedYAML); EXPECT_THAT(ConcatenatedSymbols, UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2"))); Index: clangd/tool/ClangdMain.cpp === --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -37,7 +37,7 @@ llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; return nullptr; } - auto Slab = SymbolFromYAML(Buffer.get()->getBuffer()); + auto Slab = SymbolsFromYAML(Buffer.get()->getBuffer()); SymbolSlab::Builder SymsBuilder; for (auto Sym : Slab) SymsBuilder.insert(Sym); Index: clangd/index/SymbolYAML.h === --- clangd/index/SymbolYAML.h +++ clangd/index/SymbolYAML.h @@ -25,7 +25,11 @@ namespace clangd { // Read symbols from a YAML-format string. -SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent); +SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent); + +// Read one symbol from a YAML-format string, backed by the arena. +Symbol SymbolFromYAML(llvm::StringRef YAMLContent, + llvm::BumpPtrAllocator ); // Convert a single symbol to YAML-format string. // The YAML result is safe to concatenate. Index: clangd/index/SymbolYAML.cpp === --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -97,7 +97,9 @@ IO.mapRequired("Name", Sym.Name); IO.mapRequired("Scope", Sym.Scope); IO.mapRequired("SymInfo", Sym.SymInfo); -IO.mapRequired("CanonicalDeclaration", Sym.CanonicalDeclaration); +IO.mapOptional("CanonicalDeclaration", Sym.CanonicalDeclaration, + SymbolLocation()); +IO.mapOptional("Definition", Sym.Definition, SymbolLocation()); IO.mapRequired("CompletionLabel", Sym.CompletionLabel); IO.mapRequired("CompletionFilterText", Sym.CompletionFilterText); IO.mapRequired("CompletionPlainInsertText", Sym.CompletionPlainInsertText); @@ -160,7 +162,7 @@ namespace clang { namespace clangd { -SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent) { +SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent) { // Store data of pointer fields (excl. `StringRef`) like `Detail`. llvm::BumpPtrAllocator Arena; llvm::yaml::Input Yin(YAMLContent, ); @@ -173,6 +175,14 @@ return std::move(Syms).build(); } +Symbol