https://github.com/HighCommander4 created https://github.com/llvm/llvm-project/pull/140651
The optimization is not valid, given that the target of a symbol reference can change without the spelling of the reference (or anything else in the file containing the reference) changing, as illustrated in the added test case. Partially fixes https://github.com/clangd/clangd/issues/1104 >From 25593242602e445b184574f76621b9d500c3698e Mon Sep 17 00:00:00 2001 From: Nathan Ridge <zeratul...@hotmail.com> Date: Sat, 17 May 2025 22:11:45 -0400 Subject: [PATCH] [clangd] Drop the optimization where only shards for modified files are updated in the background index The optimization is not valid, given that the target of a symbol reference can change without the spelling of the reference (or anything else in the file containing the reference) changing, as illustrated in the added test case. Partially fixes https://github.com/clangd/clangd/issues/1104 --- clang-tools-extra/clangd/index/Background.cpp | 46 ++--------- clang-tools-extra/clangd/index/Background.h | 9 +-- .../clangd/unittests/BackgroundIndexTests.cpp | 78 ++++++++++++++++++- 3 files changed, 85 insertions(+), 48 deletions(-) diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 496d1455def4b..c4bab3c4d4b20 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -180,13 +180,10 @@ void BackgroundIndex::boostRelated(llvm::StringRef Path) { Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile); } -/// Given index results from a TU, only update symbols coming from files that -/// are different or missing from than \p ShardVersionsSnapshot. Also stores new -/// index information on IndexStorage. -void BackgroundIndex::update( - llvm::StringRef MainFile, IndexFileIn Index, - const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, - bool HadErrors) { +/// Given index results from a TU, update symbols coming from all files. Also +/// stores new index information on IndexStorage. +void BackgroundIndex::update(llvm::StringRef MainFile, IndexFileIn Index, + bool HadErrors) { // Keys are URIs. llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate; // Note that sources do not contain any information regarding missing headers, @@ -198,12 +195,7 @@ void BackgroundIndex::update( elog("Failed to resolve URI: {0}", AbsPath.takeError()); continue; } - const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath); - // File has different contents, or indexing was successful this time. - if (DigestIt == ShardVersionsSnapshot.end() || - DigestIt->getValue().Digest != IGN.Digest || - (DigestIt->getValue().HadErrors && !HadErrors)) - FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; + FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; } // Shard slabs into files. @@ -263,13 +255,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { return llvm::errorCodeToError(Buf.getError()); auto Hash = digest(Buf->get()->getBuffer()); - // Take a snapshot of the versions to avoid locking for each file in the TU. - llvm::StringMap<ShardVersion> ShardVersionsSnapshot; - { - std::lock_guard<std::mutex> Lock(ShardVersionsMu); - ShardVersionsSnapshot = ShardVersions; - } - vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash)); ParseInputs Inputs; Inputs.TFS = &TFS; @@ -286,25 +271,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { return error("Couldn't build compiler instance"); SymbolCollector::Options IndexOpts; - // Creates a filter to not collect index results from files with unchanged - // digests. - IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM, - FileID FID) { - const auto F = SM.getFileEntryRefForID(FID); - if (!F) - return false; // Skip invalid files. - auto AbsPath = getCanonicalPath(*F, SM.getFileManager()); - if (!AbsPath) - return false; // Skip files without absolute path. - auto Digest = digestFile(SM, FID); - if (!Digest) - return false; - auto D = ShardVersionsSnapshot.find(*AbsPath); - if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest && - !D->second.HadErrors) - return false; // Skip files that haven't changed, without errors. - return true; - }; IndexOpts.CollectMainFileRefs = true; IndexFileIn Index; @@ -345,7 +311,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { for (auto &It : *Index.Sources) It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors; } - update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors); + update(AbsolutePath, std::move(Index), HadErrors); Rebuilder.indexedTU(); return llvm::Error::success(); diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 448e911201575..87a81666e07b7 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -190,12 +190,9 @@ class BackgroundIndex : public SwapIndex { bool HadErrors = false; }; - /// Given index results from a TU, only update symbols coming from files with - /// different digests than \p ShardVersionsSnapshot. Also stores new index - /// information on IndexStorage. - void update(llvm::StringRef MainFile, IndexFileIn Index, - const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, - bool HadErrors); + /// Given index results from a TU, update symbols coming from all files. Also + /// stores new index information on IndexStorage. + void update(llvm::StringRef MainFile, IndexFileIn Index, bool HadErrors); // configuration const ThreadsafeFS &TFS; diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index ada14c9939318..ac7ed654683a2 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -14,6 +14,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include <deque> +#include <optional> #include <thread> using ::testing::_; @@ -213,10 +214,11 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) { CDB.setCompileCommand(testPath("root/B.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); - // B_CC is dropped as we don't collect symbols from A.h in this compilation. + // A_CC is dropped as A.h was now most recently indexed in the context of + // B.cc. EXPECT_THAT(runFuzzyFind(Idx, ""), UnorderedElementsAre(AllOf(named("common"), numReferences(5U)), - AllOf(named("A_CC"), numReferences(0U)), + AllOf(named("B_CC"), numReferences(0U)), AllOf(named("g"), numReferences(1U)), AllOf(named("f_b"), declared(), defined(), numReferences(1U)))); @@ -682,6 +684,78 @@ TEST_F(BackgroundIndexTest, Reindex) { EXPECT_EQ(OldShard, Storage.lookup(testPath("A.cc"))); } +// Test that restarting clangd properly updates the index with changes +// to files since the last time the index was built. +TEST_F(BackgroundIndexTest, UpdateAfterRestart) { + MockFS FS; + llvm::StringMap<std::string> Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + auto CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr); + auto Idx = std::make_unique<BackgroundIndex>( + FS, *CDB, [&](llvm::StringRef) { return &MSS; }, + BackgroundIndex::Options{}); + + // Create a header file containing a function declaration, and a source file + // containing a call to the function. + FS.Files[testPath("test.h")] = R"cpp( + #ifndef TEST_H + #define TEST_H + void waldo(int); + #endif + )cpp"; + FS.Files[testPath("test.cc")] = R"cpp( + #include "test.h" + int main() { + waldo(42); + } + )cpp"; + + // Index the files in this state. + tooling::CompileCommand Cmd; + Cmd.Filename = "../test.cc"; + Cmd.Directory = testPath("build"); + Cmd.CommandLine = {"clang++", "../test.cc", "-fsyntax-only"}; + CDB->setCompileCommand(testPath("test.cc"), Cmd); + ASSERT_TRUE(Idx->blockUntilIdleForTest()); + + // Verify that the function 'waldo' has two references in the index + // (the declaration, and the call site). + auto CheckRefCount = [&](std::string SymbolName) { + auto Syms = runFuzzyFind(*Idx, "waldo"); + EXPECT_THAT(Syms, UnorderedElementsAre(named("waldo"))); + auto Waldo = *Syms.begin(); + llvm::errs() << "Querying refs for symbol with ID " << Waldo.ID.str() + << "\n"; + return getRefs(*Idx, Waldo.ID).numRefs(); + }; + EXPECT_EQ(CheckRefCount("waldo"), 2u); + + // Modify the declaration of 'waldo' in a way that changes its SymbolID + // without changing how existing call sites are written. Here, we add + // a new parameter with a default argument. + FS.Files[testPath("test.h")] = R"cpp( + #ifndef TEST_H + #define TEST_H + void waldo(int, int = 0); + #endif + )cpp"; + + // Simulate clangd shutting down and restarting, and the background index + // being rebuilt after restart. + Idx = nullptr; + CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr); + Idx = std::make_unique<BackgroundIndex>( + FS, *CDB, [&](llvm::StringRef) { return &MSS; }, + BackgroundIndex::Options{}); + CDB->setCompileCommand(testPath("test.cc"), Cmd); + ASSERT_TRUE(Idx->blockUntilIdleForTest()); + + // The rebuild should have updated things so that 'waldo' now again has + // two references in the index. + EXPECT_EQ(CheckRefCount("waldo"), 2u); +} + class BackgroundIndexRebuilderTest : public testing::Test { protected: BackgroundIndexRebuilderTest() _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits