https://github.com/jasonwilliams updated https://github.com/llvm/llvm-project/pull/175209
>From e3cd63dda425446864c61e565194d13411b5571f Mon Sep 17 00:00:00 2001 From: jasonwilliams <[email protected]> Date: Fri, 9 Jan 2026 17:23:51 +0000 Subject: [PATCH 1/2] [clangd] Add background index format support to clangd-indexer Add support for generating background index shards directly from clangd-indexer, enabling offline pre-indexing of projects for use with clangd's background index. --- .../clangd/index/Serialization.h | 5 +- .../clangd/indexer/IndexerMain.cpp | 205 ++++++++++++++++-- 2 files changed, 191 insertions(+), 19 deletions(-) diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index bf8e036afcb6c..1553e702a5881 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -35,8 +35,9 @@ namespace clang { namespace clangd { enum class IndexFileFormat { - RIFF, // Versioned binary format, suitable for production use. - YAML, // Human-readable format, suitable for experiments and debugging. + RIFF, // Versioned binary format, suitable for production use. + YAML, // Human-readable format, suitable for experiments and debugging. + BACKGROUND // Background index format, suitable for language server use. }; // Holds the contents of an index file that was read. diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp index bc5d1a7408991..5c9e540fee0ba 100644 --- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp +++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp @@ -12,6 +12,8 @@ #include "CompileCommands.h" #include "Compiler.h" +#include "GlobalCompilationDatabase.h" +#include "index/Background.h" #include "index/IndexAction.h" #include "index/Merge.h" #include "index/Ref.h" @@ -30,13 +32,14 @@ namespace clang { namespace clangd { namespace { -static llvm::cl::opt<IndexFileFormat> - Format("format", llvm::cl::desc("Format of the index to be written"), - llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml", - "human-readable YAML format"), - clEnumValN(IndexFileFormat::RIFF, "binary", - "binary RIFF format")), - llvm::cl::init(IndexFileFormat::RIFF)); +static llvm::cl::opt<IndexFileFormat> Format( + "format", llvm::cl::desc("Format of the index to be written"), + llvm::cl::values( + clEnumValN(IndexFileFormat::YAML, "yaml", "human-readable YAML format"), + clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format"), + clEnumValN(IndexFileFormat::BACKGROUND, "background", + "background index format for language servers")), + llvm::cl::init(IndexFileFormat::RIFF)); static llvm::cl::list<std::string> QueryDriverGlobs{ "query-driver", @@ -48,6 +51,16 @@ static llvm::cl::list<std::string> QueryDriverGlobs{ llvm::cl::CommaSeparated, }; +static llvm::cl::opt<std::string> ProjectRoot{ + "project-root", + llvm::cl::desc( + "Path to the project root for --format=background. " + "Determines where to store index shards. Shards are stored in " + "<project-root>/.cache/clangd/index/. " + "Defaults to current directory if not specified."), +}; + +// Action factory that merges all symbols into a single index (for YAML/RIFF). class IndexActionFactory : public tooling::FrontendActionFactory { public: IndexActionFactory(IndexFileIn &Result) : Result(Result) {} @@ -123,6 +136,117 @@ class IndexActionFactory : public tooling::FrontendActionFactory { RelationSlab::Builder Relations; }; +// Action factory that writes per-file shards (for background index format). +class BackgroundIndexActionFactory : public tooling::FrontendActionFactory { +public: + BackgroundIndexActionFactory(BackgroundIndexStorage &Storage) + : Storage(Storage), Symbols(std::make_unique<SymbolSlab::Builder>()), + Refs(std::make_unique<RefSlab::Builder>()), + Relations(std::make_unique<RelationSlab::Builder>()) {} + + std::unique_ptr<FrontendAction> create() override { + SymbolCollector::Options Opts; + Opts.CountReferences = true; + Opts.FileFilter = [&](const SourceManager &SM, FileID FID) { + const auto F = SM.getFileEntryRefForID(FID); + if (!F) + return false; + auto AbsPath = getCanonicalPath(*F, SM.getFileManager()); + if (!AbsPath) + return false; + std::lock_guard<std::mutex> Lock(FilesMu); + return Files.insert(*AbsPath).second; + }; + return createStaticIndexingAction( + Opts, + [&](SymbolSlab S) { + std::lock_guard<std::mutex> Lock(SymbolsMu); + for (const auto &Sym : S) { + if (const auto *Existing = Symbols->find(Sym.ID)) + Symbols->insert(mergeSymbol(*Existing, Sym)); + else + Symbols->insert(Sym); + } + }, + [&](RefSlab S) { + std::lock_guard<std::mutex> Lock(RefsMu); + for (const auto &Sym : S) { + for (const auto &Ref : Sym.second) + Refs->insert(Sym.first, Ref); + } + }, + [&](RelationSlab S) { + std::lock_guard<std::mutex> Lock(RelsMu); + for (const auto &R : S) + Relations->insert(R); + }, + /*IncludeGraphCallback=*/nullptr); + } + + bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation, + FileManager *Files, + std::shared_ptr<PCHContainerOperations> PCHContainerOps, + DiagnosticConsumer *DiagConsumer) override { + disableUnsupportedOptions(*Invocation); + + // Get the main file path before running. + std::string MainFile; + if (!Invocation->getFrontendOpts().Inputs.empty()) + MainFile = Invocation->getFrontendOpts().Inputs[0].getFile().str(); + + bool Success = tooling::FrontendActionFactory::runInvocation( + std::move(Invocation), Files, std::move(PCHContainerOps), DiagConsumer); + + // After processing, write a shard for this file. + if (Success && !MainFile.empty()) + writeShardForFile(MainFile); + + return Success; + } + +private: + void writeShardForFile(llvm::StringRef MainFile) { + IndexFileIn Data; + { + std::lock_guard<std::mutex> Lock(SymbolsMu); + Data.Symbols = std::move(*Symbols).build(); + Symbols = std::make_unique<SymbolSlab::Builder>(); + } + { + std::lock_guard<std::mutex> Lock(RefsMu); + Data.Refs = std::move(*Refs).build(); + Refs = std::make_unique<RefSlab::Builder>(); + } + { + std::lock_guard<std::mutex> Lock(RelsMu); + Data.Relations = std::move(*Relations).build(); + Relations = std::make_unique<RelationSlab::Builder>(); + } + + IndexFileOut Out(Data); + Out.Format = IndexFileFormat::RIFF; // Shards use RIFF format. + + if (auto Err = Storage.storeShard(MainFile, Out)) { + elog("Failed to write shard for {0}: {1}", MainFile, std::move(Err)); + } else { + std::lock_guard<std::mutex> Lock(FilesMu); + ++ShardsWritten; + log("Wrote shard for {0} ({1} total)", MainFile, ShardsWritten); + } + } + + BackgroundIndexStorage &Storage; + std::mutex FilesMu; + llvm::StringSet<> Files; + unsigned ShardsWritten = 0; + std::mutex SymbolsMu; + std::unique_ptr<SymbolSlab::Builder> Symbols; + std::mutex RefsMu; + std::unique_ptr<RefSlab::Builder> Refs; + std::mutex RelsMu; + std::unique_ptr<RelationSlab::Builder> Relations; +}; + } // namespace } // namespace clangd } // namespace clang @@ -141,6 +265,13 @@ int main(int argc, const char **argv) { $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex + Example usage for background index format (writes shards to disk): + + $ clangd-indexer --format=background --executor=all-TUs build/ + + This writes index shards to .cache/clangd/index/ in the current directory. + Use --project-root to specify a different location for the shards. + Note: only symbols from header files will be indexed. )"; @@ -152,23 +283,63 @@ int main(int argc, const char **argv) { return 1; } - // Collect symbols found in each translation unit, merging as we go. - clang::clangd::IndexFileIn Data; auto Mangler = std::make_shared<clang::clangd::CommandMangler>( clang::clangd::CommandMangler::detect()); Mangler->SystemIncludeExtractor = clang::clangd::getSystemIncludeExtractor( static_cast<llvm::ArrayRef<std::string>>( clang::clangd::QueryDriverGlobs)); + + auto Adjuster = clang::tooling::ArgumentsAdjuster( + [Mangler = std::move(Mangler)](const std::vector<std::string> &Args, + llvm::StringRef File) { + clang::tooling::CompileCommand Cmd; + Cmd.CommandLine = Args; + Mangler->operator()(Cmd, File); + return Cmd.CommandLine; + }); + + // Handle background index format separately - writes per-file shards. + if (clang::clangd::Format == clang::clangd::IndexFileFormat::BACKGROUND) { + // Default to current directory if --project-root not specified. + std::string Root = clang::clangd::ProjectRoot; + if (Root.empty()) { + llvm::SmallString<256> CurrentDir; + if (auto EC = llvm::sys::fs::current_path(CurrentDir)) { + llvm::errs() << "Error: Failed to get current directory: " + << EC.message() << "\n"; + return 1; + } + Root = std::string(CurrentDir); + } + + // Create storage factory for disk-backed index shards. + auto IndexStorageFactory = + clang::clangd::BackgroundIndexStorage::createDiskBackedStorageFactory( + [Root](clang::clangd::PathRef) { + return clang::clangd::ProjectInfo{Root}; + }); + + // Get storage for the project root. + clang::clangd::BackgroundIndexStorage *Storage = IndexStorageFactory(Root); + + auto Err = Executor->get()->execute( + std::make_unique<clang::clangd::BackgroundIndexActionFactory>(*Storage), + std::move(Adjuster)); + if (Err) { + clang::clangd::elog("{0}", std::move(Err)); + return 1; + } + + llvm::errs() << "Background index shards written to " << Root + << "/.cache/clangd/index/\n"; + return 0; + } + + // Standard mode: collect and merge symbols, then emit to stdout. + clang::clangd::IndexFileIn Data; auto Err = Executor->get()->execute( std::make_unique<clang::clangd::IndexActionFactory>(Data), - clang::tooling::ArgumentsAdjuster( - [Mangler = std::move(Mangler)](const std::vector<std::string> &Args, - llvm::StringRef File) { - clang::tooling::CompileCommand Cmd; - Cmd.CommandLine = Args; - Mangler->operator()(Cmd, File); - return Cmd.CommandLine; - })); + std::move(Adjuster)); if (Err) { clang::clangd::elog("{0}", std::move(Err)); } >From 8637465b69b97a1cd1ab1b0debeb509f8a7def2f Mon Sep 17 00:00:00 2001 From: jasonwilliams <[email protected]> Date: Sun, 11 Jan 2026 15:01:40 +0000 Subject: [PATCH 2/2] handle background case for serialization --- clang-tools-extra/clangd/index/Serialization.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index f03839599612c..10388b1948f43 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -686,6 +686,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { case IndexFileFormat::YAML: writeYAML(O, OS); break; + case IndexFileFormat::BACKGROUND: + llvm_unreachable("BACKGROUND format not supported for serialization"); } return OS; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
