https://github.com/JVApen created https://github.com/llvm/llvm-project/pull/156185
Fixes clangd/clangd#2122 Use an alternative builder implementation in cases were no duplicate entries are expected. In practice, this is when loading a previously created index as the deduplicaton happened while making that one. By using a simple std::vector over a DenseSet, we don't have the overhead of keeping all entries sorted. For our use-case, this reduces the total loading time from 1 minute 15 seconds to 1 minute and reduces the CPU seconds from 50 to 30. >From 14eed6410c1183f67974c62cf9ca0674e0a477af Mon Sep 17 00:00:00 2001 From: JVApen <vanantwerpenjer...@gmail.com> Date: Sat, 30 Aug 2025 16:29:54 +0200 Subject: [PATCH] Fixes clangd/clangd#2122 Use an alternative builder implementation in cases were no duplicate entries are expected. In practice, this is when loading a previously created index as the deduplicaton happened while making that one. By using a simple std::vector over a DenseSet, we don't have the overhead of keeping all entries sorted. For our use-case, this reduces the total loading time from 1 minute 15 seconds to 1 minute and reduces the CPU seconds from 50 to 30. --- clang-tools-extra/clangd/index/FileIndex.cpp | 2 +- clang-tools-extra/clangd/index/Ref.cpp | 27 +++++++++++++++ clang-tools-extra/clangd/index/Ref.h | 33 +++++++++++++++++++ .../clangd/index/Serialization.cpp | 2 +- .../clangd/index/YAMLSerialization.cpp | 2 +- 5 files changed, 63 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index c49de377d54ca..8fdcfce06656b 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -204,7 +204,7 @@ FileShardedIndex::getShard(llvm::StringRef Uri) const { SymB.insert(*S); IF.Symbols = std::move(SymB).build(); - RefSlab::Builder RefB; + RefSlab::BuilderExpectUnique RefB; for (const auto *Ref : It->getValue().Refs) { auto SID = RefToSymID.lookup(Ref); RefB.insert(SID, *Ref); diff --git a/clang-tools-extra/clangd/index/Ref.cpp b/clang-tools-extra/clangd/index/Ref.cpp index e622a3f78e42e..5640199e45175 100644 --- a/clang-tools-extra/clangd/index/Ref.cpp +++ b/clang-tools-extra/clangd/index/Ref.cpp @@ -64,5 +64,32 @@ RefSlab RefSlab::Builder::build() && { return RefSlab(std::move(Result), std::move(Arena), Entries.size()); } +void RefSlab::BuilderExpectUnique::insert(const SymbolID &ID, const Ref &S) { + Entry E = {ID, S}; + E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data(); + Entries.emplace_back(std::move(E)); +} + +RefSlab RefSlab::BuilderExpectUnique::build() && { + std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result; + // We'll reuse the arena, as it only has unique strings and we need them all. + // We need to group refs by symbol and form contiguous arrays on the arena. + // Group by SymbolID. + std::sort(Entries.begin(), Entries.end()); + Entries.erase(std::unique(Entries.begin(), Entries.end()), Entries.end()); + std::vector<Ref> Refs; + // Loop over symbols, copying refs for each onto the arena. + for (auto I = Entries.begin(), End = Entries.end(); I != End;) { + SymbolID Sym = I->Symbol; + Refs.clear(); + do { + Refs.push_back(I->Reference); + ++I; + } while (I != End && I->Symbol == Sym); + Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena)); + } + return RefSlab(std::move(Result), std::move(Arena), Entries.size()); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h index 870f77f56e6cb..9a7e7aadba9f5 100644 --- a/clang-tools-extra/clangd/index/Ref.h +++ b/clang-tools-extra/clangd/index/Ref.h @@ -132,6 +132,8 @@ class RefSlab { } /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab. + /// This variant is optimized to receive duplicate symbols. + /// Use this when parsing files class Builder { public: Builder() : UniqueStrings(Arena) {} @@ -154,6 +156,37 @@ class RefSlab { llvm::DenseSet<Entry> Entries; }; + /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab. + /// This variant is optimized to receive unique symbols + /// Use this when reading symbols which where previously written in a file. + class BuilderExpectUnique { + public: + BuilderExpectUnique() : UniqueStrings(Arena) {} + /// Adds a ref to the slab. Deep copy: Strings will be owned by the slab. + void insert(const SymbolID &ID, const Ref &S); + /// Consumes the builder to finalize the slab. + RefSlab build() &&; + private: + // A ref we're storing with its symbol to consume with build(). + // All strings are interned, so DenseMapInfo can use pointer comparisons. + struct Entry { + SymbolID Symbol; + Ref Reference; + friend bool operator<(const Entry &L, const Entry &R) noexcept { + return std::tie(L.Symbol, L.Reference) < + std::tie(R.Symbol, R.Reference); + } + friend bool operator==(const Entry &L, const Entry &R) noexcept { + return std::tie(L.Symbol, L.Reference) == + std::tie(R.Symbol, R.Reference); + } + }; + friend struct llvm::DenseMapInfo<Entry>; + llvm::BumpPtrAllocator Arena; + llvm::UniqueStringSaver UniqueStrings; // Contents on the arena. + std::vector<Entry> Entries; + }; + private: RefSlab(std::vector<value_type> Refs, llvm::BumpPtrAllocator Arena, size_t NumRefs) diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index f03839599612c..d21e16c7a33dd 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -516,7 +516,7 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, } if (Chunks.count("refs")) { Reader RefsReader(Chunks.lookup("refs")); - RefSlab::Builder Refs; + RefSlab::BuilderExpectUnique Refs; while (!RefsReader.eof()) { auto RefsBundle = readRefs(RefsReader, Strings->Strings); for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert? diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp index 495d8a2ff487a..eb90f167c1b51 100644 --- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp +++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp @@ -473,7 +473,7 @@ void writeYAML(const IndexFileOut &O, llvm::raw_ostream &OS) { llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data, SymbolOrigin Origin) { SymbolSlab::Builder Symbols; - RefSlab::Builder Refs; + RefSlab::BuilderExpectUnique Refs; RelationSlab::Builder Relations; llvm::BumpPtrAllocator Arena; // store the underlying data of Position::FileURI. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits