Author: Justin Cady Date: 2026-03-24T11:21:06-04:00 New Revision: c1ea10a854dcbbb50c954ebb87246d822cd01e21
URL: https://github.com/llvm/llvm-project/commit/c1ea10a854dcbbb50c954ebb87246d822cd01e21 DIFF: https://github.com/llvm/llvm-project/commit/c1ea10a854dcbbb50c954ebb87246d822cd01e21.diff LOG: [clangd] Add background index path remapping tool (#185482) Introduce a standalone tool to remap paths inside clangd background index files. This allows users to pay the cost of generating a full background index once, then reuse the background index across multiple clients. Each client rewrites the background index in place after copying the original, specifying the path(s) to be remapped. This is an alternative approach to the dynamic path remapping proposed in #180285. Fixes clangd/clangd#847 Assisted-by: claude Added: clang-tools-extra/clangd/remap/CMakeLists.txt clang-tools-extra/clangd/remap/RemapMain.cpp clang-tools-extra/clangd/test/remap.test Modified: clang-tools-extra/clangd/CMakeLists.txt clang-tools-extra/clangd/test/CMakeLists.txt Removed: ################################################################################ diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 4d8649dbdcb09..a0e8036038d30 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -201,6 +201,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") endif() add_subdirectory(tool) add_subdirectory(indexer) +add_subdirectory(remap) if (LLVM_INCLUDE_BENCHMARKS) add_subdirectory(benchmarks) diff --git a/clang-tools-extra/clangd/remap/CMakeLists.txt b/clang-tools-extra/clangd/remap/CMakeLists.txt new file mode 100644 index 0000000000000..ff8adb0d72f64 --- /dev/null +++ b/clang-tools-extra/clangd/remap/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_executable(clangd-remap + RemapMain.cpp + ) + +clang_target_link_libraries(clangd-remap + PRIVATE + clangBasic + ) +target_link_libraries(clangd-remap + PRIVATE + clangDaemon + clangdSupport + ) diff --git a/clang-tools-extra/clangd/remap/RemapMain.cpp b/clang-tools-extra/clangd/remap/RemapMain.cpp new file mode 100644 index 0000000000000..8d0a44ec0c587 --- /dev/null +++ b/clang-tools-extra/clangd/remap/RemapMain.cpp @@ -0,0 +1,345 @@ +//===--- RemapMain.cpp - Remap paths in background index shards -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// clangd-remap is a standalone tool that rewrites paths inside every .idx shard +// in a background index directory. An index generated on one machine (or at one +// workspace path) can be remapped and reused within a source tree at a +// diff erent location. +// +// Usage: +// clangd-remap --path-mappings=/old/root=/new/root /path/to/index-dir +// +//===----------------------------------------------------------------------===// + +#include "Headers.h" +#include "PathMapping.h" +#include "SourceCode.h" +#include "URI.h" +#include "index/Ref.h" +#include "index/Serialization.h" +#include "index/Symbol.h" +#include "support/Logger.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" +#include <atomic> +#include <string> +#include <vector> + +namespace clang { +namespace clangd { +namespace { + +static llvm::cl::OptionCategory RemapCategory("clangd-remap options"); + +static llvm::cl::opt<std::string> PathMappingsArg{ + "path-mappings", + llvm::cl::cat(RemapCategory), + llvm::cl::desc( + "List of path mappings applied to every string in each background " + "index shard. Format: /old/path=/new/path[,/old2=/new2,...]"), + llvm::cl::Required, +}; + +static llvm::cl::opt<std::string> IndexDir{ + llvm::cl::desc("<index-dir>"), + llvm::cl::cat(RemapCategory), + llvm::cl::Positional, + llvm::cl::Required, +}; + +static llvm::cl::opt<unsigned> NumThreads{ + "j", + llvm::cl::cat(RemapCategory), + llvm::cl::desc("Number of worker threads (0 = all)"), + llvm::cl::init(0), +}; + +static llvm::cl::opt<Logger::Level> LogLevel{ + "log", + llvm::cl::cat(RemapCategory), + llvm::cl::desc("Verbosity of log messages written to stderr"), + llvm::cl::values( + clEnumValN(Logger::Error, "error", "Error messages only"), + clEnumValN(Logger::Info, "info", "High level execution tracing"), + clEnumValN(Logger::Debug, "verbose", "Low level details")), + llvm::cl::init(Logger::Info), +}; + +// Apply a path mapping to a URI or raw path string +// +// Ex. given "-I/old/root/include" and mapping /old/root=/new/root, the result +// is "-I/new/root/include" +std::optional<std::string> remapString(llvm::StringRef S, + const PathMappings &Mappings) { + // Client = old path, Server = new path; ClientToServer maps old -> new + if (S.starts_with("file://")) + return doPathMapping(S, PathMapping::Direction::ClientToServer, Mappings); + + // For non-URI strings (compilation flags, directory paths, etc.) only match + // at the first '/' (where an absolute path begins) + // FIXME: This does not handle Windows paths; only POSIX paths are supported. + size_t FirstSlash = S.find('/'); + if (FirstSlash == llvm::StringRef::npos) + return std::nullopt; + + for (const auto &Mapping : Mappings) { + size_t Pos = S.find(Mapping.ClientPath); + if (Pos == FirstSlash) { + llvm::StringRef After = S.substr(Pos + Mapping.ClientPath.size()); + // Ensure a full path-component match: "/old" must not match "/older" + if (After.empty() || After.front() == '/') + return (S.substr(0, Pos) + Mapping.ServerPath + After).str(); + } + } + return std::nullopt; +} + +// Remap a StringRef in-place, saving the result into the Arena so the +// pointer remains valid +void remapRef(llvm::StringRef &S, const PathMappings &Mappings, + llvm::StringSaver &Saver) { + if (auto R = remapString(S, Mappings)) + S = Saver.save(std::move(*R)); +} + +// Like remapRef, but _always_ saves into Saver (even on no match). Used for +// StringRefs that will outlive their original storage. +void remapOrCopyRef(llvm::StringRef &S, const PathMappings &Mappings, + llvm::StringSaver &Saver) { + if (auto R = remapString(S, Mappings)) + S = Saver.save(std::move(*R)); + else + S = Saver.save(S); +} + +void remapCharURI(const char *&P, const PathMappings &Mappings, + llvm::StringSaver &Saver) { + llvm::StringRef S(P); + if (auto R = remapString(S, Mappings)) + P = Saver.save(std::move(*R)).data(); +} + +void remapStdStr(std::string &S, const PathMappings &Mappings) { + if (auto R = remapString(S, Mappings)) + S = std::move(*R); +} + +std::vector<std::string> collectShards(llvm::StringRef Dir) { + std::vector<std::string> Paths; + std::error_code EC; + for (llvm::sys::fs::recursive_directory_iterator It(Dir, EC), End; + It != End && !EC; It.increment(EC)) { + if (llvm::sys::path::extension(It->path()) == ".idx") + Paths.push_back(It->path()); + } + if (EC) + elog("Error scanning directory {0}: {1}", Dir, EC.message()); + return Paths; +} + +// Compute shard filename for a source path. (See getShardPathFromFilePath() +// in BackgroundIndexStorage.cpp.) +std::string shardName(llvm::StringRef SourceFilePath) { + return (llvm::sys::path::filename(SourceFilePath) + "." + + llvm::toHex(digest(SourceFilePath)) + ".idx") + .str(); +} + +// For each source entry, resolve its URI to get the original absolute path and +// compute that shard name. Find the entry whose shard name matches, and apply +// the path mappings to that path to compute the new shard name. +// +// This must be called before remapIndexData(), since it needs the original (not +// remapped) URIs. +std::string deriveNewFilename(const IndexFileIn &Data, + llvm::StringRef OldFilename, + const PathMappings &Mappings) { + if (!Data.Sources || Data.Sources->empty()) + return OldFilename.str(); + + for (const auto &Entry : *Data.Sources) { + auto U = URI::parse(Entry.first()); + if (!U) { + llvm::consumeError(U.takeError()); + continue; + } + auto Path = URI::resolve(*U); + if (!Path) { + llvm::consumeError(Path.takeError()); + continue; + } + if (shardName(*Path) == OldFilename) { + std::string NewPath = *Path; + remapStdStr(NewPath, Mappings); + return shardName(NewPath); + } + } + return OldFilename.str(); +} + +// Remap all paths inside a parsed IndexFileIn in-place. Saver is used to +// allocate new strings for fields stored as StringRef or raw pointers. +void remapIndexData(IndexFileIn &Data, const PathMappings &Mappings, + llvm::StringSaver &Saver) { + if (Data.Symbols) { + // SymbolSlab is immutable, so we rebuild it + SymbolSlab::Builder Builder; + for (const auto &Sym : *Data.Symbols) { + Symbol S = Sym; + remapCharURI(S.CanonicalDeclaration.FileURI, Mappings, Saver); + remapCharURI(S.Definition.FileURI, Mappings, Saver); + for (auto &Inc : S.IncludeHeaders) + remapRef(Inc.IncludeHeader, Mappings, Saver); + Builder.insert(S); + } + Data.Symbols = std::move(Builder).build(); + } + + if (Data.Refs) { + RefSlab::Builder Builder; + for (const auto &Entry : *Data.Refs) { + for (const auto &R : Entry.second) { + Ref MR = R; // mutable copy + remapCharURI(MR.Location.FileURI, Mappings, Saver); + Builder.insert(Entry.first, MR); + } + } + Data.Refs = std::move(Builder).build(); + } + + // We must rebuild the StringMap because keys may change. All StringRef + // fields (URI, DirectIncludes) are saved into Saver because the old + // StringMap is destroyed below. + if (Data.Sources) { + IncludeGraph NewSources; + for (auto &Entry : *Data.Sources) { + IncludeGraphNode IGN = Entry.getValue(); + remapOrCopyRef(IGN.URI, Mappings, Saver); + for (auto &Inc : IGN.DirectIncludes) + remapOrCopyRef(Inc, Mappings, Saver); + NewSources[IGN.URI] = std::move(IGN); + } + Data.Sources = std::move(NewSources); + } + + if (Data.Cmd) { + remapStdStr(Data.Cmd->Directory, Mappings); + for (auto &Arg : Data.Cmd->CommandLine) + remapStdStr(Arg, Mappings); + remapStdStr(Data.Cmd->Filename, Mappings); + } +} + +} // namespace +} // namespace clangd +} // namespace clang + +int main(int Argc, const char **Argv) { + using namespace clang::clangd; + + llvm::sys::PrintStackTraceOnErrorSignal(Argv[0]); + llvm::cl::HideUnrelatedOptions(RemapCategory); + llvm::cl::ParseCommandLineOptions(Argc, Argv, + "clangd-remap: rewrite paths inside " + "background-index .idx shards\n"); + + StreamLogger Logger(llvm::errs(), LogLevel); + LoggingSession LoggingSession(Logger); + + auto Mappings = parsePathMappings(PathMappingsArg); + if (!Mappings) { + elog("Invalid --path-mappings: {0}", Mappings.takeError()); + return 1; + } + if (Mappings->empty()) { + elog("No path mappings specified."); + return 1; + } + + // Gather all shard files from the index directory. + auto AllShards = collectShards(IndexDir); + if (AllShards.empty()) { + log("No .idx files found in the specified directories."); + return 0; + } + + log("Found {0} shard(s) to process.", AllShards.size()); + for (const auto &M : *Mappings) + log(" Path mapping: {0}", M); + + if (NumThreads.getValue() != 0) + llvm::parallel::strategy = llvm::hardware_concurrency(NumThreads); + + std::atomic<unsigned> Errors{0}; + std::atomic<unsigned> FilesRenamed{0}; + std::atomic<unsigned> FilesUnchanged{0}; + + llvm::parallelFor(0, AllShards.size(), [&](size_t I) { + const std::string &ShardPath = AllShards[I]; + + auto Buf = llvm::MemoryBuffer::getFile(ShardPath); + if (!Buf) { + elog("Cannot read {0}: {1}", ShardPath, Buf.getError().message()); + ++Errors; + return; + } + + auto Parsed = readIndexFile((*Buf)->getBuffer(), SymbolOrigin::Background); + if (!Parsed) { + elog("Cannot parse {0}: {1}", ShardPath, Parsed.takeError()); + ++Errors; + return; + } + + // Derive the new shard filename before remapping, so we can match + // against original (un-remapped) source URIs. + llvm::StringRef OldFilename = llvm::sys::path::filename(ShardPath); + std::string NewFilename = + deriveNewFilename(*Parsed, OldFilename, *Mappings); + + // Remap all paths in the parsed data + llvm::BumpPtrAllocator Arena; + llvm::StringSaver Saver(Arena); + remapIndexData(*Parsed, *Mappings, Saver); + + // Write the remapped shard (possibly under a new name) + llvm::StringRef ParentDir = llvm::sys::path::parent_path(ShardPath); + llvm::SmallString<256> NewPath(ParentDir); + llvm::sys::path::append(NewPath, NewFilename); + if (auto Err = llvm::writeToOutput(NewPath, [&](llvm::raw_ostream &OS) { + IndexFileOut Out(*Parsed); + Out.Format = IndexFileFormat::RIFF; + OS << Out; + return llvm::Error::success(); + })) { + elog("Cannot write {0}: {1}", NewPath, std::move(Err)); + ++Errors; + return; + } + + // If the filename changed, remove the old shard + if (NewFilename != OldFilename) { + llvm::sys::fs::remove(ShardPath); + ++FilesRenamed; + } else + ++FilesUnchanged; + }); + + unsigned Renamed = FilesRenamed.load(); + unsigned Unchanged = FilesUnchanged.load(); + log("Processed: {0} shard(s), {1} renamed, {2} unchanged, {3} error(s).", + Renamed + Unchanged, Renamed, Unchanged, Errors.load()); + return Errors.load() > 0 ? 1 : 0; +} diff --git a/clang-tools-extra/clangd/test/CMakeLists.txt b/clang-tools-extra/clangd/test/CMakeLists.txt index eef8f529667f7..608835cab2fc5 100644 --- a/clang-tools-extra/clangd/test/CMakeLists.txt +++ b/clang-tools-extra/clangd/test/CMakeLists.txt @@ -2,6 +2,7 @@ set(CLANGD_TEST_DEPS clangd ClangdTests clangd-indexer + clangd-remap split-file ) diff --git a/clang-tools-extra/clangd/test/remap.test b/clang-tools-extra/clangd/test/remap.test new file mode 100644 index 0000000000000..0b5985ac681eb --- /dev/null +++ b/clang-tools-extra/clangd/test/remap.test @@ -0,0 +1,18 @@ +# Paths are not constructed correctly for the test to run on Windows +# UNSUPPORTED: system-windows +# +# RUN: rm -rf %t && mkdir -p %t/src %t/index +# RUN: echo 'void hello();' > %t/src/test.h +# RUN: printf '#include "test.h"\nvoid hello() {}\n' > %t/src/test.cpp +# RUN: clangd-indexer %t/src/test.cpp -- -I%t/src > %t/index/test.idx +# +# Remap %t/src -> /remapped/path inside the shard +# RUN: clangd-remap --path-mappings '%t/src=/remapped/path' %t/index 2>&1 \ +# RUN: | FileCheck -check-prefix=REMAP %s +# REMAP: Found 1 shard(s) to process +# REMAP: Processed: 1 shard(s), 0 renamed, 1 unchanged, 0 error(s) +# +# Verify the remapped path appears in the symbol data +# RUN: dexp %t/index/test.idx -c="lookup -name hello" \ +# RUN: | FileCheck -check-prefix=LOOKUP %s +# LOOKUP: file:///remapped/path/test.h _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
