https://github.com/JVApen created 
https://github.com/llvm/llvm-project/pull/156185

Fixes clangd/clangd#2122

Use an alternative builder implementation in cases were no duplicate entries 
are expected. In practice, this is when loading a previously created index as 
the deduplicaton happened while making that one. By using a simple std::vector 
over a DenseSet, we don't have the overhead of keeping all entries sorted.

For our use-case, this reduces the total loading time from 1 minute 15 seconds 
to 1 minute and reduces the CPU seconds from 50 to 30.

>From 14eed6410c1183f67974c62cf9ca0674e0a477af Mon Sep 17 00:00:00 2001
From: JVApen <vanantwerpenjer...@gmail.com>
Date: Sat, 30 Aug 2025 16:29:54 +0200
Subject: [PATCH] Fixes clangd/clangd#2122

Use an alternative builder implementation in cases were no duplicate entries 
are expected.
In practice, this is when loading a previously created index as the 
deduplicaton happened while making that one.
By using a simple std::vector over a DenseSet, we don't have the overhead of 
keeping all entries sorted.

For our use-case, this reduces the total loading time from 1 minute 15 seconds 
to 1 minute and reduces the CPU seconds from 50 to 30.
---
 clang-tools-extra/clangd/index/FileIndex.cpp  |  2 +-
 clang-tools-extra/clangd/index/Ref.cpp        | 27 +++++++++++++++
 clang-tools-extra/clangd/index/Ref.h          | 33 +++++++++++++++++++
 .../clangd/index/Serialization.cpp            |  2 +-
 .../clangd/index/YAMLSerialization.cpp        |  2 +-
 5 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp 
b/clang-tools-extra/clangd/index/FileIndex.cpp
index c49de377d54ca..8fdcfce06656b 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -204,7 +204,7 @@ FileShardedIndex::getShard(llvm::StringRef Uri) const {
     SymB.insert(*S);
   IF.Symbols = std::move(SymB).build();
 
-  RefSlab::Builder RefB;
+  RefSlab::BuilderExpectUnique RefB;
   for (const auto *Ref : It->getValue().Refs) {
     auto SID = RefToSymID.lookup(Ref);
     RefB.insert(SID, *Ref);
diff --git a/clang-tools-extra/clangd/index/Ref.cpp 
b/clang-tools-extra/clangd/index/Ref.cpp
index e622a3f78e42e..5640199e45175 100644
--- a/clang-tools-extra/clangd/index/Ref.cpp
+++ b/clang-tools-extra/clangd/index/Ref.cpp
@@ -64,5 +64,32 @@ RefSlab RefSlab::Builder::build() && {
   return RefSlab(std::move(Result), std::move(Arena), Entries.size());
 }
 
+void RefSlab::BuilderExpectUnique::insert(const SymbolID &ID, const Ref &S) {
+  Entry E = {ID, S};
+  E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data();
+  Entries.emplace_back(std::move(E));
+}
+
+RefSlab RefSlab::BuilderExpectUnique::build() && {
+  std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result;
+  // We'll reuse the arena, as it only has unique strings and we need them all.
+  // We need to group refs by symbol and form contiguous arrays on the arena.
+  // Group by SymbolID.
+  std::sort(Entries.begin(), Entries.end());
+  Entries.erase(std::unique(Entries.begin(), Entries.end()), Entries.end());
+  std::vector<Ref> Refs;
+  // Loop over symbols, copying refs for each onto the arena.
+  for (auto I = Entries.begin(), End = Entries.end(); I != End;) {
+    SymbolID Sym = I->Symbol;
+    Refs.clear();
+    do {
+      Refs.push_back(I->Reference);
+      ++I;
+    } while (I != End && I->Symbol == Sym);
+    Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena));
+  }
+  return RefSlab(std::move(Result), std::move(Arena), Entries.size());
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/index/Ref.h 
b/clang-tools-extra/clangd/index/Ref.h
index 870f77f56e6cb..9a7e7aadba9f5 100644
--- a/clang-tools-extra/clangd/index/Ref.h
+++ b/clang-tools-extra/clangd/index/Ref.h
@@ -132,6 +132,8 @@ class RefSlab {
   }
 
   /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab.
+  /// This variant is optimized to receive duplicate symbols.
+  /// Use this when parsing files
   class Builder {
   public:
     Builder() : UniqueStrings(Arena) {}
@@ -154,6 +156,37 @@ class RefSlab {
     llvm::DenseSet<Entry> Entries;
   };
 
+  /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab.
+  /// This variant is optimized to receive unique symbols
+  /// Use this when reading symbols which where previously written in a file.
+  class BuilderExpectUnique {
+  public:
+    BuilderExpectUnique() : UniqueStrings(Arena) {}
+    /// Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
+    void insert(const SymbolID &ID, const Ref &S);
+    /// Consumes the builder to finalize the slab.
+    RefSlab build() &&;
+  private:
+    // A ref we're storing with its symbol to consume with build().
+    // All strings are interned, so DenseMapInfo can use pointer comparisons.
+    struct Entry {
+      SymbolID Symbol;
+      Ref Reference;
+      friend bool operator<(const Entry &L, const Entry &R) noexcept {
+        return std::tie(L.Symbol, L.Reference) <
+               std::tie(R.Symbol, R.Reference);
+      }
+      friend bool operator==(const Entry &L, const Entry &R) noexcept {
+        return std::tie(L.Symbol, L.Reference) ==
+               std::tie(R.Symbol, R.Reference);
+      }
+    };
+    friend struct llvm::DenseMapInfo<Entry>;
+    llvm::BumpPtrAllocator Arena;
+    llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
+    std::vector<Entry> Entries;
+  };
+
 private:
   RefSlab(std::vector<value_type> Refs, llvm::BumpPtrAllocator Arena,
           size_t NumRefs)
diff --git a/clang-tools-extra/clangd/index/Serialization.cpp 
b/clang-tools-extra/clangd/index/Serialization.cpp
index f03839599612c..d21e16c7a33dd 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -516,7 +516,7 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
   }
   if (Chunks.count("refs")) {
     Reader RefsReader(Chunks.lookup("refs"));
-    RefSlab::Builder Refs;
+    RefSlab::BuilderExpectUnique Refs;
     while (!RefsReader.eof()) {
       auto RefsBundle = readRefs(RefsReader, Strings->Strings);
       for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp 
b/clang-tools-extra/clangd/index/YAMLSerialization.cpp
index 495d8a2ff487a..eb90f167c1b51 100644
--- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp
+++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp
@@ -473,7 +473,7 @@ void writeYAML(const IndexFileOut &O, llvm::raw_ostream 
&OS) {
 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data,
                                      SymbolOrigin Origin) {
   SymbolSlab::Builder Symbols;
-  RefSlab::Builder Refs;
+  RefSlab::BuilderExpectUnique Refs;
   RelationSlab::Builder Relations;
   llvm::BumpPtrAllocator
       Arena; // store the underlying data of Position::FileURI.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to