https://github.com/dzbarsky created https://github.com/llvm/llvm-project/pull/202663
StandardLibrary.cpp emits the generated C and C++ symbol mappings once to initialize SymbolHeaderMapping and a second time to count unique symbols. The second expansion costs about 46 KiB of read-only data and constructs a DenseSet during initialization. Return one SymbolMapping array per language and count the already-grouped qualified names with a linear pass before initializing the mapping from the same array. This removes the duplicate generated tables and the temporary DenseSet without changing the lookup representation. In an arm64 Release build, StandardLibrary.cpp.o loadable contents decrease from 272,536 to 224,729 bytes (-47,807). In the LLVM 22 Bazel build, clangd decreases from 52,672,672 to 52,622,704 bytes (-49,968) and stripped clangd decreases from 40,952,336 to 40,902,784 bytes (-49,552). The multicall binary decreases from 162,148,880 to 162,098,928 bytes (-49,952) and its stripped form decreases from 132,207,344 to 132,157,792 bytes (-49,552). A 50-batch initialization benchmark measured -1.16% elapsed initialization time with a 95% bootstrap interval of -3.70% to +1.68%. A 300-process clangd --check benchmark measured -1.17% CPU with a 95% bootstrap interval of -2.64% to -0.00%. All six existing StdlibTest cases pass, covering C and C++ mappings, experimental symbols, recognizer behavior, and special C mappings. Work towards #202616 >From c5a8f493ce5edb01ec5b0d4fa9a41d83386644a5 Mon Sep 17 00:00:00 2001 From: David Zbarsky <[email protected]> Date: Tue, 9 Jun 2026 04:54:39 -0400 Subject: [PATCH] [clang][Tooling] Reuse standard-library symbol descriptors StandardLibrary.cpp emits the generated C and C++ symbol mappings once to initialize SymbolHeaderMapping and a second time to count unique symbols. The second expansion costs about 46 KiB of read-only data and constructs a DenseSet during initialization. Return one SymbolMapping array per language and count the already-grouped qualified names with a linear pass before initializing the mapping from the same array. This removes the duplicate generated tables and the temporary DenseSet without changing the lookup representation. In an arm64 Release build, StandardLibrary.cpp.o loadable contents decrease from 272,536 to 224,729 bytes (-47,807). In the LLVM 22 Bazel build, clangd decreases from 52,672,672 to 52,622,704 bytes (-49,968) and stripped clangd decreases from 40,952,336 to 40,902,784 bytes (-49,552). The multicall binary decreases from 162,148,880 to 162,098,928 bytes (-49,952) and its stripped form decreases from 132,207,344 to 132,157,792 bytes (-49,552). A 50-batch initialization benchmark measured -1.16% elapsed initialization time with a 95% bootstrap interval of -3.70% to +1.68%. A 300-process clangd --check benchmark measured -1.17% CPU with a 95% bootstrap interval of -2.64% to -0.00%. All six existing StdlibTest cases pass, covering C and C++ mappings, experimental symbols, recognizer behavior, and special C mappings. --- .../Inclusions/Stdlib/StandardLibrary.cpp | 75 ++++++++----------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp index 807a8d8a34ad7..e24b934e01071 100644 --- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp +++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp @@ -10,10 +10,10 @@ #include "clang/AST/Decl.h" #include "clang/Basic/LangOptions.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include <optional> namespace clang { @@ -24,6 +24,12 @@ namespace { // Symbol name -> Symbol::ID, within a namespace. using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>; +struct SymbolMapping { + const char *QName; + unsigned NSLen; + const char *HeaderName; +}; + // A Mapping per language. struct SymbolHeaderMapping { llvm::StringRef *HeaderNames = nullptr; @@ -54,37 +60,50 @@ static const SymbolHeaderMapping *getMappingPerLang(Lang L) { return LanguageMappings[static_cast<unsigned>(L)]; } -static int countSymbols(Lang Language) { - ArrayRef<const char *> Symbols; -#define SYMBOL(Name, NS, Header) #NS #Name, +static ArrayRef<SymbolMapping> getSymbolMappings(Lang Language) { +#define SYMBOL(Name, NS, Header) \ + {#NS #Name, \ + static_cast<decltype(SymbolMapping::NSLen)>(StringRef(#NS).size()), \ + #Header}, switch (Language) { case Lang::C: { - static constexpr const char *CSymbols[] = { + static constexpr SymbolMapping CSymbols[] = { #include "CSpecialSymbolMap.inc" #include "CSymbolMap.inc" }; - Symbols = CSymbols; - break; + return CSymbols; } case Lang::CXX: { - static constexpr const char *CXXSymbols[] = { + static constexpr SymbolMapping CXXSymbols[] = { #include "StdSpecialSymbolMap.inc" #include "StdSymbolMap.inc" #include "StdTsSymbolMap.inc" }; - Symbols = CXXSymbols; - break; + return CXXSymbols; } } #undef SYMBOL - return llvm::DenseSet<StringRef>(llvm::from_range, Symbols).size(); + llvm_unreachable("unknown language"); +} + +static unsigned countSymbols(ArrayRef<SymbolMapping> Symbols) { + unsigned Count = 0; + StringRef Previous; + for (const SymbolMapping &S : Symbols) { + if (Previous != S.QName) { + ++Count; + Previous = S.QName; + } + } + return Count; } static int initialize(Lang Language) { SymbolHeaderMapping *Mapping = new SymbolHeaderMapping(); LanguageMappings[static_cast<unsigned>(Language)] = Mapping; - unsigned SymCount = countSymbols(Language); + ArrayRef<SymbolMapping> Symbols = getSymbolMappings(Language); + unsigned SymCount = countSymbols(Symbols); Mapping->SymbolCount = SymCount; Mapping->SymbolNames = new std::remove_reference_t<decltype(*Mapping->SymbolNames)>[SymCount]; @@ -137,36 +156,8 @@ static int initialize(Lang Language) { NSSymbols.try_emplace(QName.drop_front(NSLen), SymIndex); }; - struct Symbol { - const char *QName; - unsigned NSLen; - const char *HeaderName; - }; -#define SYMBOL(Name, NS, Header) \ - {#NS #Name, static_cast<decltype(Symbol::NSLen)>(StringRef(#NS).size()), \ - #Header}, - switch (Language) { - case Lang::C: { - static constexpr Symbol CSymbols[] = { -#include "CSpecialSymbolMap.inc" -#include "CSymbolMap.inc" - }; - for (const Symbol &S : CSymbols) - Add(S.QName, S.NSLen, S.HeaderName); - break; - } - case Lang::CXX: { - static constexpr Symbol CXXSymbols[] = { -#include "StdSpecialSymbolMap.inc" -#include "StdSymbolMap.inc" -#include "StdTsSymbolMap.inc" - }; - for (const Symbol &S : CXXSymbols) - Add(S.QName, S.NSLen, S.HeaderName); - break; - } - } -#undef SYMBOL + for (const SymbolMapping &S : Symbols) + Add(S.QName, S.NSLen, S.HeaderName); Mapping->HeaderNames = new llvm::StringRef[Mapping->HeaderIDs->size()]; for (const auto &E : *Mapping->HeaderIDs) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
