https://github.com/justincady updated https://github.com/llvm/llvm-project/pull/180285
>From 991944ef6aab28aa7de7f2ff3a4c54cca4804d4d Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 14:51:23 -0500 Subject: [PATCH 01/11] Add --background-index-path-mappings option Add the option and corresponding storage. Mimic the existing path mappings (used for remote indexing) as much as possible. --- clang-tools-extra/clangd/ClangdServer.h | 6 ++++++ clang-tools-extra/clangd/tool/ClangdMain.cpp | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index 3ffaf67553dce..e4a52ff682002 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -17,6 +17,7 @@ #include "GlobalCompilationDatabase.h" #include "Hover.h" #include "ModulesBuilder.h" +#include "PathMapping.h" #include "Protocol.h" #include "SemanticHighlighting.h" #include "TUScheduler.h" @@ -199,6 +200,11 @@ class ClangdServer { /// regions in the document. bool PublishInactiveRegions = false; + /// Path mappings applied to background index files on disk. Used to enable + /// sharing of indexes when the client path differs from the path of index + /// generation. + PathMappings BackgroundIndexPathMappings; + explicit operator TUScheduler::Options() const; }; // Sensible default options for use in tests. diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 54af3662470db..f702db03907a1 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -435,6 +435,16 @@ opt<bool> EnableTestScheme{ Hidden, }; +opt<std::string> BackgroundIndexPathMappings{ + "background-index-path-mappings", + cat(Protocol), + desc("Translate clients paths prior to writing background index files to " + "disk. Enables sharing of background index files between clients. " + "Format is identical to --path-mappings. " + "e.g. /local/workspace=/TOKEN/workspace"), + init(""), +}; + opt<std::string> PathMappingsArg{ "path-mappings", cat(Protocol), @@ -937,6 +947,15 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var #endif Opts.BackgroundIndex = EnableBackgroundIndex; Opts.BackgroundIndexPriority = BackgroundIndexPriority; + if (!BackgroundIndexPathMappings.empty()) { + auto Mappings = parsePathMappings(BackgroundIndexPathMappings); + if (!Mappings) { + elog("Invalid --background-index-path-mappings: {0}", + Mappings.takeError()); + return 1; + } + Opts.BackgroundIndexPathMappings = std::move(*Mappings); + } Opts.ReferencesLimit = ReferencesLimit; Opts.Rename.LimitFiles = RenameFileLimit; auto PAI = createProjectAwareIndex( >From 0faac6171f9332a9d246847cd49d2918521b64a9 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 14:55:26 -0500 Subject: [PATCH 02/11] Pass mappings to createDiskBackedStorageFactory --- clang-tools-extra/clangd/ClangdServer.cpp | 3 ++- clang-tools-extra/clangd/index/Background.h | 5 ++++- clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index f1a87dd12d905..b1bcd975a1346 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -262,7 +262,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, BackgroundIdx = std::make_unique<BackgroundIndex>( TFS, CDB, BackgroundIndexStorage::createDiskBackedStorageFactory( - [&CDB](llvm::StringRef File) { return CDB.getProjectInfo(File); }), + [&CDB](llvm::StringRef File) { return CDB.getProjectInfo(File); }, + Opts.BackgroundIndexPathMappings), std::move(BGOpts)); AddIndex(BackgroundIdx.get()); } diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 448e911201575..8bbdf72457c0e 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H #include "GlobalCompilationDatabase.h" +#include "PathMapping.h" #include "SourceCode.h" #include "index/BackgroundRebuild.h" #include "index/FileIndex.h" @@ -61,8 +62,10 @@ class BackgroundIndexStorage { // CDBDirectory + ".cache/clangd/index/" as the folder to save shards. // CDBDirectory is the first directory containing a CDB in parent directories // of a file, or user cache directory if none was found, e.g. stdlib headers. + // If Mappings are given, paths are remapped before shards are saved to disk. static Factory createDiskBackedStorageFactory( - std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo); + std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, + PathMappings Mappings); }; // A priority queue of tasks which can be run on (external) worker threads. diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 470be79590863..048deb4db9332 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "GlobalCompilationDatabase.h" +#include "PathMapping.h" #include "index/Background.h" #include "support/Logger.h" #include "support/Path.h" @@ -150,7 +151,8 @@ class DiskBackedIndexStorageManager { BackgroundIndexStorage::Factory BackgroundIndexStorage::createDiskBackedStorageFactory( - std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo) { + std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, + PathMappings Mappings) { return DiskBackedIndexStorageManager(std::move(GetProjectInfo)); } >From 1b91038430041871a72e5f209057919d3b663765 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 14:59:47 -0500 Subject: [PATCH 03/11] Pass background mappings to DiskBackedIndexStorage --- .../clangd/index/BackgroundIndexStorage.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 048deb4db9332..3e133fadf9844 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -37,10 +37,12 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, // Uses disk as a storage for index shards. class DiskBackedIndexStorage : public BackgroundIndexStorage { std::string DiskShardRoot; + PathMappings Mappings; public: // Creates `DiskShardRoot` and any parents during construction. - DiskBackedIndexStorage(llvm::StringRef Directory) : DiskShardRoot(Directory) { + DiskBackedIndexStorage(llvm::StringRef Directory, PathMappings Mappings) + : DiskShardRoot(Directory), Mappings(std::move(Mappings)) { std::error_code OK; std::error_code EC = llvm::sys::fs::create_directories(DiskShardRoot); if (EC != OK) { @@ -107,9 +109,11 @@ class NullStorage : public BackgroundIndexStorage { class DiskBackedIndexStorageManager { public: DiskBackedIndexStorageManager( - std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo) + std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, + PathMappings Mappings) : IndexStorageMapMu(std::make_unique<std::mutex>()), - GetProjectInfo(std::move(GetProjectInfo)) { + GetProjectInfo(std::move(GetProjectInfo)), + Mappings(std::move(Mappings)) { llvm::SmallString<128> FallbackDir; if (llvm::sys::path::cache_directory(FallbackDir)) llvm::sys::path::append(FallbackDir, "clangd", "index"); @@ -136,7 +140,7 @@ class DiskBackedIndexStorageManager { elog("Tried to create storage for empty directory!"); return std::make_unique<NullStorage>(); } - return std::make_unique<DiskBackedIndexStorage>(CDBDirectory); + return std::make_unique<DiskBackedIndexStorage>(CDBDirectory, Mappings); } Path FallbackDir; @@ -145,6 +149,7 @@ class DiskBackedIndexStorageManager { std::unique_ptr<std::mutex> IndexStorageMapMu; std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo; + PathMappings Mappings; }; } // namespace @@ -153,7 +158,8 @@ BackgroundIndexStorage::Factory BackgroundIndexStorage::createDiskBackedStorageFactory( std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, PathMappings Mappings) { - return DiskBackedIndexStorageManager(std::move(GetProjectInfo)); + return DiskBackedIndexStorageManager(std::move(GetProjectInfo), + std::move(Mappings)); } } // namespace clangd >From 02641754272b1086bdf4a2a8eab98933901bf640 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:03:48 -0500 Subject: [PATCH 04/11] Add mapping lambdas to DiskBackedIndexStorage The lambda functions will be used in a later commit to transform paths prior to serializing (and deserializing) the background index files. The goal is to reuse existing PathMapping infrastructure as much as possible for background indexing (e.g. PathMapping::Direction being reused). --- .../clangd/index/BackgroundIndexStorage.cpp | 22 +++++++++++++++++++ .../clangd/index/Serialization.h | 4 ++++ 2 files changed, 26 insertions(+) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 3e133fadf9844..1b5e6b3d3f732 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -25,6 +25,15 @@ namespace clang { namespace clangd { namespace { +// Apply path mapping to file URI. Return original URI if no mapping applies. +std::string applyPathMappingToURI(llvm::StringRef URI, + PathMapping::Direction Direction, + const PathMappings &Mappings) { + if (auto Mapped = doPathMapping(URI, Direction, Mappings)) + return std::move(*Mapped); + return URI.str(); +} + std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, llvm::StringRef FilePath) { llvm::SmallString<128> ShardRootSS(ShardRoot); @@ -38,11 +47,24 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, class DiskBackedIndexStorage : public BackgroundIndexStorage { std::string DiskShardRoot; PathMappings Mappings; + URITransform LoadTransform; + URITransform StoreTransform; public: // Creates `DiskShardRoot` and any parents during construction. DiskBackedIndexStorage(llvm::StringRef Directory, PathMappings Mappings) : DiskShardRoot(Directory), Mappings(std::move(Mappings)) { + // Background path mappings are specified as /local/path=/canonical/path. + // During load we transform from canonical to local (ServerToClient). + LoadTransform = [this](llvm::StringRef URI) { + return applyPathMappingToURI(URI, PathMapping::Direction::ServerToClient, + this->Mappings); + }; + // During store we transform from local to canonical (ClientToServer). + StoreTransform = [this](llvm::StringRef URI) { + return applyPathMappingToURI(URI, PathMapping::Direction::ClientToServer, + this->Mappings); + }; std::error_code OK; std::error_code EC = llvm::sys::fs::create_directories(DiskShardRoot); if (EC != OK) { diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index bf8e036afcb6c..0942155d8e898 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -28,12 +28,16 @@ #include "index/Index.h" #include "index/Symbol.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/Support/Error.h" #include <optional> namespace clang { namespace clangd { +// Used to remap URIs during serialization/deserialization +using URITransform = llvm::unique_function<std::string(llvm::StringRef) const>; + enum class IndexFileFormat { RIFF, // Versioned binary format, suitable for production use. YAML, // Human-readable format, suitable for experiments and debugging. >From 211fea6e33a59e9fa05169d327524b46dd86ac83 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:32:43 -0500 Subject: [PATCH 05/11] Use the mapped URI to compute shard filenames Because the background index files in this mode are intended to be shared between clients with their own unique absolute paths, the shard filenames must be hashed on the _mapped_ URI. This naming only differs when `--background-index-path-mappings` is active. Otherwise, existing clangd background indexes would be invalidated (which we do not want!). A new unit test ensures that hashing is unchanged for the default, non-mapping case. --- .../clangd/index/BackgroundIndexStorage.cpp | 25 +++++++++---- .../clangd/unittests/BackgroundIndexTests.cpp | 36 +++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 1b5e6b3d3f732..eb7ab6f37345f 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -8,6 +8,7 @@ #include "GlobalCompilationDatabase.h" #include "PathMapping.h" +#include "URI.h" #include "index/Background.h" #include "support/Logger.h" #include "support/Path.h" @@ -35,11 +36,22 @@ std::string applyPathMappingToURI(llvm::StringRef URI, } std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, - llvm::StringRef FilePath) { + llvm::StringRef FilePath, + const PathMappings &Mappings) { + std::string HashInput; + if (Mappings.empty()) { + HashInput = FilePath.str(); + } else { + // Hash the mapped URI so that shards are consistently named regardless of + // the path of the generating client + std::string FileURI = URI::createFile(FilePath).toString(); + HashInput = applyPathMappingToURI( + FileURI, PathMapping::Direction::ClientToServer, Mappings); + } llvm::SmallString<128> ShardRootSS(ShardRoot); - llvm::sys::path::append(ShardRootSS, llvm::sys::path::filename(FilePath) + - "." + llvm::toHex(digest(FilePath)) + - ".idx"); + llvm::sys::path::append(ShardRootSS, + llvm::sys::path::filename(FilePath) + "." + + llvm::toHex(digest(HashInput)) + ".idx"); return std::string(ShardRootSS); } @@ -85,7 +97,7 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { std::unique_ptr<IndexFileIn> loadShard(llvm::StringRef ShardIdentifier) const override { const std::string ShardPath = - getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + getShardPathFromFilePath(DiskShardRoot, ShardIdentifier, Mappings); auto Buffer = llvm::MemoryBuffer::getFile(ShardPath); if (!Buffer) return nullptr; @@ -100,7 +112,8 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { llvm::Error storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const override { - auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + auto ShardPath = + getShardPathFromFilePath(DiskShardRoot, ShardIdentifier, Mappings); return llvm::writeToOutput(ShardPath, [&Shard](llvm::raw_ostream &OS) { OS << Shard; return llvm::Error::success(); diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 0eb4acf0469b7..7f99db011acda 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -2,6 +2,8 @@ #include "CompileCommands.h" #include "Config.h" #include "Headers.h" +#include "PathMapping.h" +#include "SourceCode.h" #include "SyncAPI.h" #include "TestFS.h" #include "TestTU.h" @@ -11,6 +13,8 @@ #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -1031,5 +1035,37 @@ TEST(BackgroundIndex, Profile) { UnorderedElementsAre(Pair("slabs", _), Pair("index", _))); } +// Verify shard filenames are unchanged when no path mappings are used +TEST(BackgroundIndexStorage, ShardFilenameUnchangedWithoutPathMappings) { + llvm::SmallString<256> TempDir; + ASSERT_FALSE(llvm::sys::fs::createUniqueDirectory("clangd-test", TempDir)); + llvm::scope_exit Cleanup([&] { llvm::sys::fs::remove_directories(TempDir); }); + + auto Factory = BackgroundIndexStorage::createDiskBackedStorageFactory( + [&](PathRef) -> std::optional<ProjectInfo> { + return ProjectInfo{TempDir.str().str()}; + }, + /*Mappings=*/{}); + + std::string TestFilePath = (TempDir + "/foo.cpp").str(); + BackgroundIndexStorage *Storage = Factory(TestFilePath); + ASSERT_NE(Storage, nullptr); + + // Store a minimal shard to create the file + SymbolSlab::Builder SB; + SymbolSlab Symbols = std::move(SB).build(); + IndexFileOut Shard; + Shard.Symbols = &Symbols; + ASSERT_FALSE(Storage->storeShard(TestFilePath, std::move(Shard))); + + // Shard filename hash must be based on TestFilePath, not a file:// URI + llvm::SmallString<256> ExpectedPath(TempDir); + llvm::sys::path::append(ExpectedPath, ".cache", "clangd", "index", + "foo.cpp." + llvm::toHex(digest(TestFilePath)) + + ".idx"); + EXPECT_TRUE(llvm::sys::fs::exists(ExpectedPath)) + << "Expected shard file not found: " << ExpectedPath; +} + } // namespace clangd } // namespace clang >From a6f3084bbf44ee9ac52e779b4f5c6b9e35558ee0 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:39:18 -0500 Subject: [PATCH 06/11] Apply background index path mapping on load When reading shards from disk, apply any background index path mappings prior to storing URIs in the string table. --- .../clangd/index/BackgroundIndexStorage.cpp | 5 +++-- .../clangd/index/Serialization.cpp | 21 ++++++++++++------- .../clangd/index/Serialization.h | 5 ++++- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index eb7ab6f37345f..4f7f8e09f4fac 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -101,8 +101,9 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { auto Buffer = llvm::MemoryBuffer::getFile(ShardPath); if (!Buffer) return nullptr; - if (auto I = - readIndexFile(Buffer->get()->getBuffer(), SymbolOrigin::Background)) + const URITransform *Transform = Mappings.empty() ? nullptr : &LoadTransform; + if (auto I = readIndexFile(Buffer->get()->getBuffer(), + SymbolOrigin::Background, Transform)) return std::make_unique<IndexFileIn>(std::move(*I)); else elog("Error while reading shard {0}: {1}", ShardIdentifier, diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index f03839599612c..f2e4e9ec3f14f 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -214,7 +214,8 @@ struct StringTableIn { std::vector<llvm::StringRef> Strings; }; -llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) { +llvm::Expected<StringTableIn> +readStringTable(llvm::StringRef Data, const URITransform *Transform = nullptr) { Reader R(Data); size_t UncompressedSize = R.consume32(); if (R.err()) @@ -249,7 +250,12 @@ llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) { auto Len = R.rest().find(0); if (Len == llvm::StringRef::npos) return error("Bad string table: not null terminated"); - Table.Strings.push_back(Saver.save(R.consume(Len))); + llvm::StringRef S = R.consume(Len); + // Apply any provided path mapping transform to incoming file:// URIs + if (Transform && S.starts_with("file://")) + Table.Strings.push_back(Saver.save((*Transform)(S))); + else + Table.Strings.push_back(Saver.save(S)); R.consume8(); } if (R.err()) @@ -459,8 +465,8 @@ readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) { // data. Later we may want to support some backward compatibility. constexpr static uint32_t Version = 20; -llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, - SymbolOrigin Origin) { +llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, SymbolOrigin Origin, + const URITransform *Transform) { auto RIFF = riff::readFile(Data); if (!RIFF) return RIFF.takeError(); @@ -483,7 +489,7 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, if (!Chunks.count(RequiredChunk)) return error("missing required chunk {0}", RequiredChunk); - auto Strings = readStringTable(Chunks.lookup("stri")); + auto Strings = readStringTable(Chunks.lookup("stri"), Transform); if (!Strings) return Strings.takeError(); @@ -691,9 +697,10 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { } llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data, - SymbolOrigin Origin) { + SymbolOrigin Origin, + const URITransform *Transform) { if (Data.starts_with("RIFF")) { - return readRIFF(Data, Origin); + return readRIFF(Data, Origin, Transform); } if (auto YAMLContents = readYAML(Data, Origin)) { return std::move(*YAMLContents); diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index 0942155d8e898..4d6fa81bea1b5 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -54,7 +54,10 @@ struct IndexFileIn { std::optional<tooling::CompileCommand> Cmd; }; // Parse an index file. The input must be a RIFF or YAML file. -llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef, SymbolOrigin); +// If Transform is provided, use it to remap all URIs. +llvm::Expected<IndexFileIn> +readIndexFile(llvm::StringRef, SymbolOrigin, + const URITransform *Transform = nullptr); // Specifies the contents of an index file to be written. struct IndexFileOut { >From 2de8816918dccf4969269914de2eb52a3dda6013 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:42:33 -0500 Subject: [PATCH 07/11] Apply background index path mapping on store Before writing background index shards out to disk, apply any path mapping transformations. Note: additional storage was required to hold the transformed URIs. There may be a better method to handle this, but I tried to mirror the existing mechanism used for loading as much as possible. --- .../clangd/index/BackgroundIndexStorage.cpp | 2 ++ clang-tools-extra/clangd/index/Serialization.cpp | 16 ++++++++++++++-- clang-tools-extra/clangd/index/Serialization.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 4f7f8e09f4fac..2f129d3e79017 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -115,6 +115,8 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { IndexFileOut Shard) const override { auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier, Mappings); + if (!Mappings.empty()) + Shard.Transform = &StoreTransform; return llvm::writeToOutput(ShardPath, [&Shard](llvm::raw_ostream &OS) { OS << Shard; return llvm::Error::success(); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index f2e4e9ec3f14f..b053453e7ca63 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -163,12 +163,16 @@ void writeVar(uint32_t I, llvm::raw_ostream &OS) { // These are sorted to improve compression. // Maps each string to a canonical representation. -// Strings remain owned externally (e.g. by SymbolSlab). +// Strings remain owned externally (e.g. by SymbolSlab), except for strings +// that are transformed by path remapping. class StringTableOut { llvm::DenseSet<llvm::StringRef> Unique; std::vector<llvm::StringRef> Sorted; // Since strings are interned, look up can be by pointer. llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index; + llvm::BumpPtrAllocator Arena; + llvm::StringSaver TransformSaver{Arena}; + const URITransform *Transform = nullptr; public: StringTableOut() { @@ -176,8 +180,14 @@ class StringTableOut { // Table size zero is reserved to indicate no compression. Unique.insert(""); } + void setTransform(const URITransform *T) { Transform = T; } // Add a string to the table. Overwrites S if an identical string exists. - void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; }; + // If path remapping is enabled, transform and store the new value. + void intern(llvm::StringRef &S) { + if (Transform && S.starts_with("file://")) + S = TransformSaver.save((*Transform)(S)); + S = *Unique.insert(S).first; + } // Finalize the table and write it to OS. No more strings may be added. void finalize(llvm::raw_ostream &OS) { Sorted = {Unique.begin(), Unique.end()}; @@ -576,6 +586,8 @@ void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) { RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); StringTableOut Strings; + if (Data.Transform) + Strings.setTransform(Data.Transform); std::vector<Symbol> Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index 4d6fa81bea1b5..50f958f8de88f 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -69,6 +69,7 @@ struct IndexFileOut { // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; const tooling::CompileCommand *Cmd = nullptr; + const URITransform *Transform = nullptr; IndexFileOut() = default; IndexFileOut(const IndexFileIn &I) >From 7bf257d9a396e8422927b3c0744f60b9a7a18269 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:47:31 -0500 Subject: [PATCH 08/11] Add lit tests to validate background path mappings 1. Test that shard filenames are based on the mapped URI 2. Test that the on-disk contents contain mapped paths 3. Test that loading on-disk contents reverses the mapping --- .../test/background-index-path-mappings.test | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 clang-tools-extra/clangd/test/background-index-path-mappings.test diff --git a/clang-tools-extra/clangd/test/background-index-path-mappings.test b/clang-tools-extra/clangd/test/background-index-path-mappings.test new file mode 100644 index 0000000000000..5e89d2ebe9fbd --- /dev/null +++ b/clang-tools-extra/clangd/test/background-index-path-mappings.test @@ -0,0 +1,87 @@ +# Use a copy of inputs, as we'll mutate it (as will the background index). +# RUN: rm -rf %/t +# RUN: cp -r %/S/Inputs/background-index %/t +# Need to embed the correct temp path in the actual JSON-RPC requests. +# RUN: sed -e "s|DIRECTORY|%/t|" %/t/definition.jsonrpc.tmpl > %/t/definition.jsonrpc.1 +# RUN: sed -e "s|DIRECTORY|%/t|" %/t/compile_commands.json.tmpl > %/t/compile_commands.json +# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..." +# (with the extra slash in the front), so we add it here +# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t/definition.jsonrpc.1 > %/t/definition.jsonrpc + +# Create the background index files with path mappings +# RUN: clangd -background-index --background-index-path-mappings=%/t=/MAPPED_ROOT -lit-test < %/t/definition.jsonrpc | FileCheck %/t/definition.jsonrpc + +############################################################################### +# 1. Validate shard filenames use the mapped path (not actual path) for hashing +############################################################################### + +# The hash of "file:///MAPPED_ROOT/foo.cpp" is deterministic +# RUN: ls %/t/.cache/clangd/index/foo.cpp.*.idx | FileCheck --check-prefix=MAPPED-HASH %s +# MAPPED-HASH: foo.cpp.BE43CE222BC6EF16.idx + +############################################################################### +# 2. Validate shard on-disk contents contain mapped paths, not actual paths +############################################################################### + +# Copy the index file to a known location so we can pass it to dexp +# RUN: cp %/t/.cache/clangd/index/foo.cpp.*.idx %/t/foo.cpp.idx + +# Export the shard to YAML format to validate its contents +# RUN: dexp %/t/foo.cpp.idx -c "export %/t/foo.yaml -format=yaml" + +# RUN: FileCheck --check-prefix=SHARD-CONTENT %s < %/t/foo.yaml + +# Verify that the symbol 'foo' has URIs with /MAPPED_ROOT prefix +# SHARD-CONTENT: --- !Symbol +# SHARD-CONTENT: Name:{{.*}}foo +# SHARD-CONTENT: CanonicalDeclaration: +# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/sub_dir/foo.h +# SHARD-CONTENT: Definition: +# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/foo.cpp + +# Verify that IncludeHeaders also uses the mapped path +# SHARD-CONTENT: IncludeHeaders: +# SHARD-CONTENT: - Header:{{.*}}/MAPPED_ROOT/sub_dir/foo.h + +# Verify that Refs use the mapped path +# SHARD-CONTENT: --- !Refs +# SHARD-CONTENT: References: +# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/foo.cpp + +# Verify that Sources use the mapped path +# SHARD-CONTENT: --- !Source +# SHARD-CONTENT: URI:{{.*}}/MAPPED_ROOT/ + +# Verify that the Cmd section keeps original paths (not mapped), since compile +# commands are machine-specific +# SHARD-CONTENT: --- !Cmd +# SHARD-CONTENT: Directory: +# SHARD-CONTENT-NOT: MAPPED_ROOT + +############################################################################### +# 3. Validate loading shards reverses the path mapping to the local path +############################################################################### + +# Create "Client B" directory with a different path but same source content +# RUN: rm -rf %/t2 +# RUN: mkdir -p %/t2 +# RUN: cp -r %/S/Inputs/background-index/* %/t2/ + +# Copy "Client A" index data to "Client B" cache directory +# RUN: mkdir -p %/t2/.cache/clangd/index +# RUN: cp %/t/.cache/clangd/index/*.idx %/t2/.cache/clangd/index/ +# RUN: mkdir -p %/t2/sub_dir/.cache/clangd/index +# RUN: cp %/t/sub_dir/.cache/clangd/index/*.idx %/t2/sub_dir/.cache/clangd/index/ + +# Set up "Client B" compile_commands.json and request file +# RUN: sed -e "s|DIRECTORY|%/t2|" %/S/Inputs/background-index/compile_commands.json.tmpl > %/t2/compile_commands.json +# RUN: sed -e "s|DIRECTORY|%/t2|" %/S/Inputs/background-index/definition.jsonrpc.tmpl > %/t2/definition.jsonrpc.1 +# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t2/definition.jsonrpc.1 > %/t2/definition.jsonrpc + +# clangd should load "Client A" shards, mapping data to "Client B" local paths. +# Verify both that go-to-definition works (in definition.jsonrpc) and that the +# returned URI points to a "Client B" path. +# RUN: clangd -background-index --background-index-path-mappings=%/t2=/MAPPED_ROOT -lit-test < %/t2/definition.jsonrpc > %/t2/clangd-output.json +# RUN: FileCheck %/t2/definition.jsonrpc < %/t2/clangd-output.json +# RUN: FileCheck --check-prefix=ROUNDTRIP %s -DDIR=%/t2 < %/t2/clangd-output.json +# ROUNDTRIP: "uri": "file://[[DIR]]/foo.cpp" >From 915454622295d3c19039dcded6eac957b2744516 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:49:41 -0500 Subject: [PATCH 09/11] Add Serialization unit tests for path remapping The primary purpose is to ensure if additional fields are added to the on-disk representation and they aren't mapped the test will fail. --- .../clangd/unittests/SerializationTests.cpp | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index d18ae478c1653..3ee7bcff958aa 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -444,6 +444,111 @@ TEST(SerializationTest, NoCrashOnBadStringTableSize) { testing::HasSubstr("bytes is implausible")); } +// Verify path remapping is applied to all URI fields during load/store +TEST(SerializationTest, URITransformRoundTrip) { + URITransform WriteTransform = [](llvm::StringRef URI) -> std::string { + std::string S = URI.str(); + size_t Pos = S.find("/original/"); + if (Pos != std::string::npos) + S.replace(Pos, strlen("/original/"), "/transformed/"); + return S; + }; + URITransform ReadTransform = [](llvm::StringRef URI) -> std::string { + std::string S = URI.str(); + size_t Pos = S.find("/transformed/"); + if (Pos != std::string::npos) + S.replace(Pos, strlen("/transformed/"), "/original/"); + return S; + }; + + // Build an index containing "/original/" + Symbol Sym; + Sym.ID = cantFail(SymbolID::fromStr("057557CEBF6E6B2D")); + Sym.Name = "TestFunc"; + Sym.Scope = "ns::"; + Sym.Definition.FileURI = "file:///original/def.cpp"; + Sym.CanonicalDeclaration.FileURI = "file:///original/decl.h"; + Sym.IncludeHeaders.push_back({/*IncludeHeader=*/"file:///original/header.h", + /*References=*/1, + /*SupportedDirectives=*/Symbol::Include}); + Sym.IncludeHeaders.push_back( + {/*IncludeHeader=*/"<system_header>", // Literal, should not be modified + /*References=*/1, + /*SupportedDirectives=*/Symbol::Include}); + + SymbolSlab::Builder SymbolBuilder; + SymbolBuilder.insert(Sym); + SymbolSlab Symbols = std::move(SymbolBuilder).build(); + + Ref R; + R.Location.FileURI = "file:///original/ref.cpp"; + R.Kind = RefKind::Reference; + RefSlab::Builder RefBuilder; + RefBuilder.insert(Sym.ID, R); + RefSlab Refs = std::move(RefBuilder).build(); + + IncludeGraph Sources; + IncludeGraphNode IGN; + IGN.URI = "file:///original/source.cpp"; + IGN.Flags = IncludeGraphNode::SourceFlag::IsTU; + IGN.Digest = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'}; + IGN.DirectIncludes = {"file:///original/inc1.h", "file:///original/inc2.h"}; + Sources[IGN.URI] = IGN; + + IndexFileOut Out; + Out.Symbols = &Symbols; + Out.Refs = &Refs; + Out.Sources = &Sources; + Out.Format = IndexFileFormat::RIFF; + Out.Transform = &WriteTransform; + std::string Serialized = llvm::to_string(Out); + + // Verify the serialized data only contains "/transformed/". And if new fields + // are added, ensure they aren't missed by path mapping transformation logic. + EXPECT_TRUE(Serialized.find("/transformed/") != std::string::npos) + << "Serialized data should contain transformed URIs"; + EXPECT_TRUE(Serialized.find("/original/") == std::string::npos) + << "Serialized data should NOT contain original URIs"; + + // Deserialize to restore "/original/" + auto In = readIndexFile(Serialized, SymbolOrigin::Background, &ReadTransform); + ASSERT_TRUE(bool(In)) << In.takeError(); + + ASSERT_TRUE(In->Symbols); + auto &ReadSym = *In->Symbols->find(Sym.ID); + EXPECT_EQ(llvm::StringRef(ReadSym.Definition.FileURI), + "file:///original/def.cpp") + << "Symbol.Definition.FileURI not transformed"; + EXPECT_EQ(llvm::StringRef(ReadSym.CanonicalDeclaration.FileURI), + "file:///original/decl.h") + << "Symbol.CanonicalDeclaration.FileURI not transformed"; + ASSERT_EQ(ReadSym.IncludeHeaders.size(), 2u); + EXPECT_EQ(ReadSym.IncludeHeaders[0].IncludeHeader, + "file:///original/header.h") + << "Symbol.IncludeHeaders[0].IncludeHeader not transformed"; + EXPECT_EQ(ReadSym.IncludeHeaders[1].IncludeHeader, "<system_header>") + << "Literal include header should not be modified"; + + ASSERT_TRUE(In->Refs); + ASSERT_EQ(In->Refs->numRefs(), 1u); + auto RefIt = In->Refs->begin(); + EXPECT_EQ(RefIt->first, Sym.ID); + ASSERT_EQ(RefIt->second.size(), 1u); + EXPECT_EQ(llvm::StringRef(RefIt->second[0].Location.FileURI), + "file:///original/ref.cpp") + << "Ref.Location.FileURI not transformed"; + + ASSERT_TRUE(In->Sources); + // After load, sources are keyed by the restored URI + auto SourceIt = In->Sources->find("file:///original/source.cpp"); + ASSERT_NE(SourceIt, In->Sources->end()) << "Source URI key not transformed"; + EXPECT_EQ(SourceIt->second.URI, "file:///original/source.cpp") + << "IncludeGraphNode.URI not transformed"; + EXPECT_THAT(SourceIt->second.DirectIncludes, + ElementsAre("file:///original/inc1.h", "file:///original/inc2.h")) + << "IncludeGraphNode.DirectIncludes not transformed"; +} + } // namespace } // namespace clangd } // namespace clang >From 76a54e4bd9de7b2586c6a00ed29266a6d3445592 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Sat, 7 Feb 2026 18:31:37 -0500 Subject: [PATCH 10/11] Fix serialization test failing with zlib present Updated the test to pass on systems with and without zlib installed. --- .../clangd/unittests/SerializationTests.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index 3ee7bcff958aa..499b2f4c6abb3 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -503,14 +503,17 @@ TEST(SerializationTest, URITransformRoundTrip) { Out.Transform = &WriteTransform; std::string Serialized = llvm::to_string(Out); - // Verify the serialized data only contains "/transformed/". And if new fields - // are added, ensure they aren't missed by path mapping transformation logic. - EXPECT_TRUE(Serialized.find("/transformed/") != std::string::npos) - << "Serialized data should contain transformed URIs"; - EXPECT_TRUE(Serialized.find("/original/") == std::string::npos) - << "Serialized data should NOT contain original URIs"; - - // Deserialize to restore "/original/" + // Verify path mapping was applied by deserializing without the load + // transform. We cannot search raw bytes as the string table may be + // compressed. + auto Raw = readIndexFile(Serialized, SymbolOrigin::Background); + ASSERT_TRUE(bool(Raw)) << Raw.takeError(); + ASSERT_TRUE(Raw->Symbols); + EXPECT_EQ(llvm::StringRef(Raw->Symbols->find(Sym.ID)->Definition.FileURI), + "file:///transformed/def.cpp") + << "Write transform should have rewritten URIs on disk"; + + // Deserialize with load transform to restore "/original/" auto In = readIndexFile(Serialized, SymbolOrigin::Background, &ReadTransform); ASSERT_TRUE(bool(In)) << In.takeError(); >From dabb5fb8bd295f9c648548312ab4e284a0e8fe80 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Sat, 7 Feb 2026 20:29:06 -0500 Subject: [PATCH 11/11] Normalize path separators in getAbsolutePath Attempt to fix a CI failure exclusive to Windows. Normalize the path separators using the same pattern as FileSystemScheme::getAbsolutePath. --- clang-tools-extra/clangd/index/Background.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 17a8097394492..1bc1584563eca 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -74,6 +74,7 @@ llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { llvm::sys::path::append(AbsolutePath, Cmd.Filename); llvm::sys::path::remove_dots(AbsolutePath, true); } + llvm::sys::path::native(AbsolutePath); return AbsolutePath; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
