https://github.com/jasonwilliams updated 
https://github.com/llvm/llvm-project/pull/175209

>From 7c9f53c6babb10490fa8b0d92ca37ccebc6f536e Mon Sep 17 00:00:00 2001
From: jasonwilliams <[email protected]>
Date: Fri, 9 Jan 2026 17:23:51 +0000
Subject: [PATCH] [clangd] Add background index format support to
 clangd-indexer

Add support for generating background index shards directly from 
clangd-indexer, enabling offline pre-indexing of projects for use with clangd's 
background index.
---
 .../clangd/index/Serialization.h              |   5 +-
 .../clangd/indexer/IndexerMain.cpp            | 205 ++++++++++++++++--
 2 files changed, 191 insertions(+), 19 deletions(-)

diff --git a/clang-tools-extra/clangd/index/Serialization.h 
b/clang-tools-extra/clangd/index/Serialization.h
index bf8e036afcb6c..1553e702a5881 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -35,8 +35,9 @@ namespace clang {
 namespace clangd {
 
 enum class IndexFileFormat {
-  RIFF, // Versioned binary format, suitable for production use.
-  YAML, // Human-readable format, suitable for experiments and debugging.
+  RIFF,      // Versioned binary format, suitable for production use.
+  YAML,      // Human-readable format, suitable for experiments and debugging.
+  BACKGROUND // Background index format, suitable for language server use.
 };
 
 // Holds the contents of an index file that was read.
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp 
b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index bc5d1a7408991..5c9e540fee0ba 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -12,6 +12,8 @@
 
 #include "CompileCommands.h"
 #include "Compiler.h"
+#include "GlobalCompilationDatabase.h"
+#include "index/Background.h"
 #include "index/IndexAction.h"
 #include "index/Merge.h"
 #include "index/Ref.h"
@@ -30,13 +32,14 @@ namespace clang {
 namespace clangd {
 namespace {
 
-static llvm::cl::opt<IndexFileFormat>
-    Format("format", llvm::cl::desc("Format of the index to be written"),
-           llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
-                                       "human-readable YAML format"),
-                            clEnumValN(IndexFileFormat::RIFF, "binary",
-                                       "binary RIFF format")),
-           llvm::cl::init(IndexFileFormat::RIFF));
+static llvm::cl::opt<IndexFileFormat> Format(
+    "format", llvm::cl::desc("Format of the index to be written"),
+    llvm::cl::values(
+        clEnumValN(IndexFileFormat::YAML, "yaml", "human-readable YAML 
format"),
+        clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format"),
+        clEnumValN(IndexFileFormat::BACKGROUND, "background",
+                   "background index format for language servers")),
+    llvm::cl::init(IndexFileFormat::RIFF));
 
 static llvm::cl::list<std::string> QueryDriverGlobs{
     "query-driver",
@@ -48,6 +51,16 @@ static llvm::cl::list<std::string> QueryDriverGlobs{
     llvm::cl::CommaSeparated,
 };
 
+static llvm::cl::opt<std::string> ProjectRoot{
+    "project-root",
+    llvm::cl::desc(
+        "Path to the project root for --format=background. "
+        "Determines where to store index shards. Shards are stored in "
+        "<project-root>/.cache/clangd/index/. "
+        "Defaults to current directory if not specified."),
+};
+
+// Action factory that merges all symbols into a single index (for YAML/RIFF).
 class IndexActionFactory : public tooling::FrontendActionFactory {
 public:
   IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
@@ -123,6 +136,117 @@ class IndexActionFactory : public 
tooling::FrontendActionFactory {
   RelationSlab::Builder Relations;
 };
 
+// Action factory that writes per-file shards (for background index format).
+class BackgroundIndexActionFactory : public tooling::FrontendActionFactory {
+public:
+  BackgroundIndexActionFactory(BackgroundIndexStorage &Storage)
+      : Storage(Storage), Symbols(std::make_unique<SymbolSlab::Builder>()),
+        Refs(std::make_unique<RefSlab::Builder>()),
+        Relations(std::make_unique<RelationSlab::Builder>()) {}
+
+  std::unique_ptr<FrontendAction> create() override {
+    SymbolCollector::Options Opts;
+    Opts.CountReferences = true;
+    Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
+      const auto F = SM.getFileEntryRefForID(FID);
+      if (!F)
+        return false;
+      auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
+      if (!AbsPath)
+        return false;
+      std::lock_guard<std::mutex> Lock(FilesMu);
+      return Files.insert(*AbsPath).second;
+    };
+    return createStaticIndexingAction(
+        Opts,
+        [&](SymbolSlab S) {
+          std::lock_guard<std::mutex> Lock(SymbolsMu);
+          for (const auto &Sym : S) {
+            if (const auto *Existing = Symbols->find(Sym.ID))
+              Symbols->insert(mergeSymbol(*Existing, Sym));
+            else
+              Symbols->insert(Sym);
+          }
+        },
+        [&](RefSlab S) {
+          std::lock_guard<std::mutex> Lock(RefsMu);
+          for (const auto &Sym : S) {
+            for (const auto &Ref : Sym.second)
+              Refs->insert(Sym.first, Ref);
+          }
+        },
+        [&](RelationSlab S) {
+          std::lock_guard<std::mutex> Lock(RelsMu);
+          for (const auto &R : S)
+            Relations->insert(R);
+        },
+        /*IncludeGraphCallback=*/nullptr);
+  }
+
+  bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+                     FileManager *Files,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     DiagnosticConsumer *DiagConsumer) override {
+    disableUnsupportedOptions(*Invocation);
+
+    // Get the main file path before running.
+    std::string MainFile;
+    if (!Invocation->getFrontendOpts().Inputs.empty())
+      MainFile = Invocation->getFrontendOpts().Inputs[0].getFile().str();
+
+    bool Success = tooling::FrontendActionFactory::runInvocation(
+        std::move(Invocation), Files, std::move(PCHContainerOps), 
DiagConsumer);
+
+    // After processing, write a shard for this file.
+    if (Success && !MainFile.empty())
+      writeShardForFile(MainFile);
+
+    return Success;
+  }
+
+private:
+  void writeShardForFile(llvm::StringRef MainFile) {
+    IndexFileIn Data;
+    {
+      std::lock_guard<std::mutex> Lock(SymbolsMu);
+      Data.Symbols = std::move(*Symbols).build();
+      Symbols = std::make_unique<SymbolSlab::Builder>();
+    }
+    {
+      std::lock_guard<std::mutex> Lock(RefsMu);
+      Data.Refs = std::move(*Refs).build();
+      Refs = std::make_unique<RefSlab::Builder>();
+    }
+    {
+      std::lock_guard<std::mutex> Lock(RelsMu);
+      Data.Relations = std::move(*Relations).build();
+      Relations = std::make_unique<RelationSlab::Builder>();
+    }
+
+    IndexFileOut Out(Data);
+    Out.Format = IndexFileFormat::RIFF; // Shards use RIFF format.
+
+    if (auto Err = Storage.storeShard(MainFile, Out)) {
+      elog("Failed to write shard for {0}: {1}", MainFile, std::move(Err));
+    } else {
+      std::lock_guard<std::mutex> Lock(FilesMu);
+      ++ShardsWritten;
+      log("Wrote shard for {0} ({1} total)", MainFile, ShardsWritten);
+    }
+  }
+
+  BackgroundIndexStorage &Storage;
+  std::mutex FilesMu;
+  llvm::StringSet<> Files;
+  unsigned ShardsWritten = 0;
+  std::mutex SymbolsMu;
+  std::unique_ptr<SymbolSlab::Builder> Symbols;
+  std::mutex RefsMu;
+  std::unique_ptr<RefSlab::Builder> Refs;
+  std::mutex RelsMu;
+  std::unique_ptr<RelationSlab::Builder> Relations;
+};
+
 } // namespace
 } // namespace clangd
 } // namespace clang
@@ -141,6 +265,13 @@ int main(int argc, const char **argv) {
 
   $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
 
+  Example usage for background index format (writes shards to disk):
+
+  $ clangd-indexer --format=background --executor=all-TUs build/
+
+  This writes index shards to .cache/clangd/index/ in the current directory.
+  Use --project-root to specify a different location for the shards.
+
   Note: only symbols from header files will be indexed.
   )";
 
@@ -152,23 +283,63 @@ int main(int argc, const char **argv) {
     return 1;
   }
 
-  // Collect symbols found in each translation unit, merging as we go.
-  clang::clangd::IndexFileIn Data;
   auto Mangler = std::make_shared<clang::clangd::CommandMangler>(
       clang::clangd::CommandMangler::detect());
   Mangler->SystemIncludeExtractor = clang::clangd::getSystemIncludeExtractor(
       static_cast<llvm::ArrayRef<std::string>>(
           clang::clangd::QueryDriverGlobs));
+
+  auto Adjuster = clang::tooling::ArgumentsAdjuster(
+      [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
+                                     llvm::StringRef File) {
+        clang::tooling::CompileCommand Cmd;
+        Cmd.CommandLine = Args;
+        Mangler->operator()(Cmd, File);
+        return Cmd.CommandLine;
+      });
+
+  // Handle background index format separately - writes per-file shards.
+  if (clang::clangd::Format == clang::clangd::IndexFileFormat::BACKGROUND) {
+    // Default to current directory if --project-root not specified.
+    std::string Root = clang::clangd::ProjectRoot;
+    if (Root.empty()) {
+      llvm::SmallString<256> CurrentDir;
+      if (auto EC = llvm::sys::fs::current_path(CurrentDir)) {
+        llvm::errs() << "Error: Failed to get current directory: "
+                     << EC.message() << "\n";
+        return 1;
+      }
+      Root = std::string(CurrentDir);
+    }
+
+    // Create storage factory for disk-backed index shards.
+    auto IndexStorageFactory =
+        clang::clangd::BackgroundIndexStorage::createDiskBackedStorageFactory(
+            [Root](clang::clangd::PathRef) {
+              return clang::clangd::ProjectInfo{Root};
+            });
+
+    // Get storage for the project root.
+    clang::clangd::BackgroundIndexStorage *Storage = IndexStorageFactory(Root);
+
+    auto Err = Executor->get()->execute(
+        
std::make_unique<clang::clangd::BackgroundIndexActionFactory>(*Storage),
+        std::move(Adjuster));
+    if (Err) {
+      clang::clangd::elog("{0}", std::move(Err));
+      return 1;
+    }
+
+    llvm::errs() << "Background index shards written to " << Root
+                 << "/.cache/clangd/index/\n";
+    return 0;
+  }
+
+  // Standard mode: collect and merge symbols, then emit to stdout.
+  clang::clangd::IndexFileIn Data;
   auto Err = Executor->get()->execute(
       std::make_unique<clang::clangd::IndexActionFactory>(Data),
-      clang::tooling::ArgumentsAdjuster(
-          [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
-                                         llvm::StringRef File) {
-            clang::tooling::CompileCommand Cmd;
-            Cmd.CommandLine = Args;
-            Mangler->operator()(Cmd, File);
-            return Cmd.CommandLine;
-          }));
+      std::move(Adjuster));
   if (Err) {
     clang::clangd::elog("{0}", std::move(Err));
   }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to