https://github.com/justincady updated 
https://github.com/llvm/llvm-project/pull/185482

>From 1139aa7765e2ce9247f86516e17f317ecba419ce Mon Sep 17 00:00:00 2001
From: Justin Cady <[email protected]>
Date: Mon, 9 Mar 2026 14:08:36 -0400
Subject: [PATCH 1/2] [clangd] Add background index path remapping tool

Introduce a standalone tool to remap paths inside clangd background
index files. This allows users to pay the cost of generating a full
background index once, then reuse the background index across multiple
clients. Each client rewrites the background index in place after
copying the original, specifying the path(s) to be remapped.

This is an alternative approach to the dynamic path remapping proposed
in #180285.

Fixes clangd/clangd#847

Assisted-by: claude
---
 clang-tools-extra/clangd/CMakeLists.txt       |   1 +
 clang-tools-extra/clangd/remap/CMakeLists.txt |  17 +
 clang-tools-extra/clangd/remap/RemapMain.cpp  | 340 ++++++++++++++++++
 clang-tools-extra/clangd/test/CMakeLists.txt  |   1 +
 clang-tools-extra/clangd/test/remap.test      |  19 +
 5 files changed, 378 insertions(+)
 create mode 100644 clang-tools-extra/clangd/remap/CMakeLists.txt
 create mode 100644 clang-tools-extra/clangd/remap/RemapMain.cpp
 create mode 100644 clang-tools-extra/clangd/test/remap.test

diff --git a/clang-tools-extra/clangd/CMakeLists.txt 
b/clang-tools-extra/clangd/CMakeLists.txt
index d7ec853af862f..5ee06c797f9cb 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -200,6 +200,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
 endif()
 add_subdirectory(tool)
 add_subdirectory(indexer)
+add_subdirectory(remap)
 
 if (LLVM_INCLUDE_BENCHMARKS)
   add_subdirectory(benchmarks)
diff --git a/clang-tools-extra/clangd/remap/CMakeLists.txt 
b/clang-tools-extra/clangd/remap/CMakeLists.txt
new file mode 100644
index 0000000000000..ff8adb0d72f64
--- /dev/null
+++ b/clang-tools-extra/clangd/remap/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_LINK_COMPONENTS
+    Support
+    )
+
+add_clang_executable(clangd-remap
+  RemapMain.cpp
+  )
+
+clang_target_link_libraries(clangd-remap
+  PRIVATE
+  clangBasic
+  )
+target_link_libraries(clangd-remap
+  PRIVATE
+  clangDaemon
+  clangdSupport
+  )
diff --git a/clang-tools-extra/clangd/remap/RemapMain.cpp 
b/clang-tools-extra/clangd/remap/RemapMain.cpp
new file mode 100644
index 0000000000000..d5564334e48da
--- /dev/null
+++ b/clang-tools-extra/clangd/remap/RemapMain.cpp
@@ -0,0 +1,340 @@
+//===--- RemapMain.cpp - Remap paths in background index shards -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// clangd-remap is a standalone tool that rewrites paths inside every .idx 
shard
+// in a background index directory. An index generated on one machine (or at 
one
+// workspace path) can be remapped and reused within a source tree at a
+// different location.
+//
+// Usage:
+//   clangd-remap --path-mappings=/old/root=/new/root /path/to/index-dir
+//
+//===----------------------------------------------------------------------===//
+
+#include "Headers.h"
+#include "PathMapping.h"
+#include "SourceCode.h"
+#include "URI.h"
+#include "index/Ref.h"
+#include "index/Serialization.h"
+#include "index/Symbol.h"
+#include "support/Logger.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+#include <atomic>
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace {
+
+static llvm::cl::OptionCategory RemapCategory("clangd-remap options");
+
+static llvm::cl::opt<std::string> PathMappingsArg{
+    "path-mappings",
+    llvm::cl::cat(RemapCategory),
+    llvm::cl::desc(
+        "List of path mappings applied to every string in each background "
+        "index shard. Format: /old/path=/new/path[,/old2=/new2,...]"),
+    llvm::cl::Required,
+};
+
+static llvm::cl::opt<std::string> IndexDir{
+    llvm::cl::desc("<index-dir>"),
+    llvm::cl::cat(RemapCategory),
+    llvm::cl::Positional,
+    llvm::cl::Required,
+};
+
+static llvm::cl::opt<unsigned> NumThreads{
+    "j",
+    llvm::cl::cat(RemapCategory),
+    llvm::cl::desc("Number of worker threads (0 = all)"),
+    llvm::cl::init(0),
+};
+
+// Apply a path mapping to a URI or raw path string
+//
+// Ex. given "-I/old/root/include" and mapping /old/root=/new/root, the result
+// is "-I/new/root/include"
+std::optional<std::string> remapString(llvm::StringRef S,
+                                       const PathMappings &Mappings) {
+  // Client = old path, Server = new path; ClientToServer maps old -> new
+  if (S.starts_with("file://"))
+    return doPathMapping(S, PathMapping::Direction::ClientToServer, Mappings);
+
+  // For non-URI strings (compilation flags, directory paths, etc.) only match
+  // at the first '/' (where an absolute path begins)
+  size_t FirstSlash = S.find('/');
+  if (FirstSlash == llvm::StringRef::npos)
+    return std::nullopt;
+
+  for (const auto &Mapping : Mappings) {
+    size_t Pos = S.find(Mapping.ClientPath);
+    if (Pos == FirstSlash) {
+      llvm::StringRef After = S.substr(Pos + Mapping.ClientPath.size());
+      // Ensure a full path-component match: "/old" must not match "/older"
+      if (After.empty() || After.front() == '/')
+        return (S.substr(0, Pos) + Mapping.ServerPath + After).str();
+    }
+  }
+  return std::nullopt;
+}
+
+// Remap a StringRef in-place, saving the result into the Arena so the
+// pointer remains valid
+void remapRef(llvm::StringRef &S, const PathMappings &Mappings,
+              llvm::StringSaver &Saver) {
+  if (auto R = remapString(S, Mappings))
+    S = Saver.save(std::move(*R));
+}
+
+// Like remapRef, but _always_ saves into Saver (even on no match). Used for
+// StringRefs that will outlive their original storage.
+void remapOrCopyRef(llvm::StringRef &S, const PathMappings &Mappings,
+                    llvm::StringSaver &Saver) {
+  if (auto R = remapString(S, Mappings))
+    S = Saver.save(std::move(*R));
+  else
+    S = Saver.save(S);
+}
+
+void remapCharURI(const char *&P, const PathMappings &Mappings,
+                  llvm::StringSaver &Saver) {
+  llvm::StringRef S(P);
+  if (auto R = remapString(S, Mappings))
+    P = Saver.save(std::move(*R)).data();
+}
+
+void remapStdStr(std::string &S, const PathMappings &Mappings) {
+  if (auto R = remapString(S, Mappings))
+    S = std::move(*R);
+}
+
+std::vector<std::string> collectShards(llvm::StringRef Dir) {
+  std::vector<std::string> Paths;
+  std::error_code EC;
+  for (llvm::sys::fs::recursive_directory_iterator It(Dir, EC), End;
+       It != End && !EC; It.increment(EC)) {
+    if (llvm::sys::path::extension(It->path()) == ".idx")
+      Paths.push_back(It->path());
+  }
+  if (EC)
+    elog("Error scanning directory {0}: {1}", Dir, EC.message());
+  return Paths;
+}
+
+// Compute shard filename for a source path. (See getShardPathFromFilePath()
+// in BackgroundIndexStorage.cpp.)
+std::string shardName(llvm::StringRef SourceFilePath) {
+  return (llvm::sys::path::filename(SourceFilePath) + "." +
+          llvm::toHex(digest(SourceFilePath)) + ".idx")
+      .str();
+}
+
+// Derive the new shard filename from the remapped Sources (IncludeGraph)
+// section. Each source entry's URI was already remapped, so resolving it
+// gives the new absolute path whose hash determines the shard filename.
+//
+// Shard filenames have the form "<source-filename>.<hash>.idx".
+// We strip ".<hash>.idx" to recover the source filename, then match it
+// against entries in Sources.
+std::string deriveNewFilename(const IndexFileIn &Data,
+                              llvm::StringRef OldFilename) {
+  if (!Data.Sources || Data.Sources->empty())
+    return OldFilename.str();
+
+  // ".<16-hex-hash>.idx" is 1 + 16 + 4 = 21 characters.
+  constexpr size_t ShardSuffixLen = 21;
+  llvm::StringRef SourceBasename = OldFilename;
+  if (SourceBasename.ends_with(".idx") &&
+      SourceBasename.size() > ShardSuffixLen)
+    SourceBasename = SourceBasename.drop_back(ShardSuffixLen);
+
+  for (const auto &Entry : *Data.Sources) {
+    auto U = URI::parse(Entry.first());
+    if (!U) {
+      llvm::consumeError(U.takeError());
+      continue;
+    }
+    auto Path = URI::resolve(*U);
+    if (!Path) {
+      llvm::consumeError(Path.takeError());
+      continue;
+    }
+    if (llvm::sys::path::filename(*Path) == SourceBasename ||
+        Data.Sources->size() == 1)
+      return shardName(*Path);
+  }
+  return OldFilename.str();
+}
+
+// Remap all paths inside a parsed IndexFileIn in-place. Saver is used to
+// allocate new strings for fields stored as StringRef or raw pointers.
+void remapIndexData(IndexFileIn &Data, const PathMappings &Mappings,
+                    llvm::StringSaver &Saver) {
+  if (Data.Symbols) {
+    // SymbolSlab is immutable, so we rebuild it
+    SymbolSlab::Builder Builder;
+    for (const auto &Sym : *Data.Symbols) {
+      Symbol S = Sym;
+      remapCharURI(S.CanonicalDeclaration.FileURI, Mappings, Saver);
+      remapCharURI(S.Definition.FileURI, Mappings, Saver);
+      for (auto &Inc : S.IncludeHeaders)
+        remapRef(Inc.IncludeHeader, Mappings, Saver);
+      Builder.insert(S);
+    }
+    Data.Symbols = std::move(Builder).build();
+  }
+
+  if (Data.Refs) {
+    RefSlab::Builder Builder;
+    for (const auto &Entry : *Data.Refs) {
+      for (const auto &R : Entry.second) {
+        Ref MR = R; // mutable copy
+        remapCharURI(MR.Location.FileURI, Mappings, Saver);
+        Builder.insert(Entry.first, MR);
+      }
+    }
+    Data.Refs = std::move(Builder).build();
+  }
+
+  // We must rebuild the StringMap because keys may change.  All StringRef
+  // fields (URI, DirectIncludes) are saved into Saver because the old
+  // StringMap is destroyed below.
+  if (Data.Sources) {
+    IncludeGraph NewSources;
+    for (auto &Entry : *Data.Sources) {
+      IncludeGraphNode IGN = Entry.getValue();
+      remapOrCopyRef(IGN.URI, Mappings, Saver);
+      for (auto &Inc : IGN.DirectIncludes)
+        remapOrCopyRef(Inc, Mappings, Saver);
+      NewSources[IGN.URI] = std::move(IGN);
+    }
+    Data.Sources = std::move(NewSources);
+  }
+
+  if (Data.Cmd) {
+    remapStdStr(Data.Cmd->Directory, Mappings);
+    for (auto &Arg : Data.Cmd->CommandLine)
+      remapStdStr(Arg, Mappings);
+    remapStdStr(Data.Cmd->Filename, Mappings);
+  }
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
+
+int main(int Argc, const char **Argv) {
+  using namespace clang::clangd;
+
+  llvm::sys::PrintStackTraceOnErrorSignal(Argv[0]);
+  llvm::cl::HideUnrelatedOptions(RemapCategory);
+  llvm::cl::ParseCommandLineOptions(Argc, Argv,
+                                    "clangd-remap: rewrite paths inside "
+                                    "background-index .idx shards\n");
+
+  auto Mappings = parsePathMappings(PathMappingsArg);
+  if (!Mappings) {
+    elog("Invalid --path-mappings: {0}", Mappings.takeError());
+    return 1;
+  }
+  if (Mappings->empty()) {
+    elog("No path mappings specified.");
+    return 1;
+  }
+
+  // Gather all shard files from the index directory.
+  auto AllShards = collectShards(IndexDir);
+  if (AllShards.empty()) {
+    llvm::errs() << "No .idx files found in the specified directories.\n";
+    return 0;
+  }
+
+  llvm::errs() << "Found " << AllShards.size() << " shard(s) to process.\n";
+  for (const auto &M : *Mappings)
+    llvm::errs() << "  Path mapping: " << M << "\n";
+
+  if (NumThreads.getValue() != 0)
+    llvm::parallel::strategy = llvm::hardware_concurrency(NumThreads);
+
+  std::atomic<unsigned> Errors{0};
+  std::atomic<unsigned> FilesRenamed{0};
+  std::atomic<unsigned> FilesUnchanged{0};
+
+  llvm::parallelFor(0, AllShards.size(), [&](size_t I) {
+    const std::string &ShardPath = AllShards[I];
+
+    auto Buf = llvm::MemoryBuffer::getFile(ShardPath);
+    if (!Buf) {
+      elog("Cannot read {0}: {1}", ShardPath, Buf.getError().message());
+      ++Errors;
+      return;
+    }
+
+    auto Parsed = readIndexFile((*Buf)->getBuffer(), SymbolOrigin::Background);
+    if (!Parsed) {
+      elog("Cannot parse {0}: {1}", ShardPath, Parsed.takeError());
+      ++Errors;
+      return;
+    }
+
+    // Remap all paths in the parsed data
+    llvm::BumpPtrAllocator Arena;
+    llvm::StringSaver Saver(Arena);
+    remapIndexData(*Parsed, *Mappings, Saver);
+
+    // Serialize back to RIFF
+    IndexFileOut Out(*Parsed);
+    Out.Format = IndexFileFormat::RIFF;
+    std::string Serialized = llvm::to_string(Out);
+
+    // Determine whether the shard filename needs to change
+    llvm::StringRef OldFilename = llvm::sys::path::filename(ShardPath);
+    llvm::StringRef ParentDir = llvm::sys::path::parent_path(ShardPath);
+    std::string NewFilename = deriveNewFilename(*Parsed, OldFilename);
+
+    // Write the remapped shard (possibly under a new name)
+    llvm::SmallString<256> NewPath(ParentDir);
+    llvm::sys::path::append(NewPath, NewFilename);
+    auto Err = llvm::writeToOutput(NewPath, [&](llvm::raw_ostream &OS) {
+      OS << Serialized;
+      return llvm::Error::success();
+    });
+    if (Err) {
+      elog("Cannot write {0}: {1}", NewPath, std::move(Err));
+      ++Errors;
+      return;
+    }
+
+    // If the filename changed, remove the old shard
+    if (NewFilename != OldFilename) {
+      llvm::sys::fs::remove(ShardPath);
+      ++FilesRenamed;
+    } else {
+      ++FilesUnchanged;
+    }
+  });
+
+  unsigned Renamed = FilesRenamed.load();
+  unsigned Unchanged = FilesUnchanged.load();
+  llvm::errs() << "Processed: " << (Renamed + Unchanged) << " shard(s), "
+               << Renamed << " renamed, " << Unchanged << " unchanged, "
+               << Errors.load() << " error(s).\n";
+  return Errors.load() > 0 ? 1 : 0;
+}
diff --git a/clang-tools-extra/clangd/test/CMakeLists.txt 
b/clang-tools-extra/clangd/test/CMakeLists.txt
index eef8f529667f7..608835cab2fc5 100644
--- a/clang-tools-extra/clangd/test/CMakeLists.txt
+++ b/clang-tools-extra/clangd/test/CMakeLists.txt
@@ -2,6 +2,7 @@ set(CLANGD_TEST_DEPS
   clangd
   ClangdTests
   clangd-indexer
+  clangd-remap
   split-file
   )
 
diff --git a/clang-tools-extra/clangd/test/remap.test 
b/clang-tools-extra/clangd/test/remap.test
new file mode 100644
index 0000000000000..52337d7832cf9
--- /dev/null
+++ b/clang-tools-extra/clangd/test/remap.test
@@ -0,0 +1,19 @@
+# Paths are not constructed correctly for the test to run on Windows
+# UNSUPPORTED: system-windows
+# REQUIRES: zlib
+#
+# RUN: rm -rf %t && mkdir -p %t/src %t/index
+# RUN: echo 'void hello();' > %t/src/test.h
+# RUN: printf '#include "test.h"\nvoid hello() {}\n' > %t/src/test.cpp
+# RUN: clangd-indexer %t/src/test.cpp -- -I%t/src > %t/index/test.idx
+#
+# Remap %t/src -> /remapped/path inside the shard
+# RUN: clangd-remap --path-mappings '%t/src=/remapped/path' %t/index 2>&1 \
+# RUN:   | FileCheck -check-prefix=REMAP %s
+# REMAP: Found 1 shard(s) to process
+# REMAP: Processed: 1 shard(s), 0 renamed, 1 unchanged, 0 error(s)
+#
+# Verify the remapped path appears in the symbol data
+# RUN: dexp %t/index/test.idx -c="lookup -name hello" \
+# RUN:   | FileCheck -check-prefix=LOOKUP %s
+# LOOKUP: file:///remapped/path/test.h

>From 45164af91e457116a5afbf2ef3fe2d2a38f6881a Mon Sep 17 00:00:00 2001
From: Justin Cady <[email protected]>
Date: Thu, 12 Mar 2026 08:59:16 -0400
Subject: [PATCH 2/2] Fix issues noted in review comments

- Add an explicit FIXME for Windows paths
- Use consistent logging mechanism
- Remove unnecessary zlib test requirement
---
 clang-tools-extra/clangd/remap/RemapMain.cpp | 12 ++++++------
 clang-tools-extra/clangd/test/remap.test     |  1 -
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/clangd/remap/RemapMain.cpp 
b/clang-tools-extra/clangd/remap/RemapMain.cpp
index d5564334e48da..9f8e31c81856f 100644
--- a/clang-tools-extra/clangd/remap/RemapMain.cpp
+++ b/clang-tools-extra/clangd/remap/RemapMain.cpp
@@ -79,6 +79,7 @@ std::optional<std::string> remapString(llvm::StringRef S,
 
   // For non-URI strings (compilation flags, directory paths, etc.) only match
   // at the first '/' (where an absolute path begins)
+  // FIXME: This does not handle Windows paths; only POSIX paths are supported.
   size_t FirstSlash = S.find('/');
   if (FirstSlash == llvm::StringRef::npos)
     return std::nullopt;
@@ -262,13 +263,13 @@ int main(int Argc, const char **Argv) {
   // Gather all shard files from the index directory.
   auto AllShards = collectShards(IndexDir);
   if (AllShards.empty()) {
-    llvm::errs() << "No .idx files found in the specified directories.\n";
+    log("No .idx files found in the specified directories.");
     return 0;
   }
 
-  llvm::errs() << "Found " << AllShards.size() << " shard(s) to process.\n";
+  log("Found {0} shard(s) to process.", AllShards.size());
   for (const auto &M : *Mappings)
-    llvm::errs() << "  Path mapping: " << M << "\n";
+    log("  Path mapping: {0}", M);
 
   if (NumThreads.getValue() != 0)
     llvm::parallel::strategy = llvm::hardware_concurrency(NumThreads);
@@ -333,8 +334,7 @@ int main(int Argc, const char **Argv) {
 
   unsigned Renamed = FilesRenamed.load();
   unsigned Unchanged = FilesUnchanged.load();
-  llvm::errs() << "Processed: " << (Renamed + Unchanged) << " shard(s), "
-               << Renamed << " renamed, " << Unchanged << " unchanged, "
-               << Errors.load() << " error(s).\n";
+  log("Processed: {0} shard(s), {1} renamed, {2} unchanged, {3} error(s).",
+      Renamed + Unchanged, Renamed, Unchanged, Errors.load());
   return Errors.load() > 0 ? 1 : 0;
 }
diff --git a/clang-tools-extra/clangd/test/remap.test 
b/clang-tools-extra/clangd/test/remap.test
index 52337d7832cf9..0b5985ac681eb 100644
--- a/clang-tools-extra/clangd/test/remap.test
+++ b/clang-tools-extra/clangd/test/remap.test
@@ -1,6 +1,5 @@
 # Paths are not constructed correctly for the test to run on Windows
 # UNSUPPORTED: system-windows
-# REQUIRES: zlib
 #
 # RUN: rm -rf %t && mkdir -p %t/src %t/index
 # RUN: echo 'void hello();' > %t/src/test.h

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to