https://github.com/augusto2112 updated https://github.com/llvm/llvm-project/pull/181952
>From e09ca06c816e716146c6745e95faee6e9fd86503 Mon Sep 17 00:00:00 2001 From: Augusto Noronha <[email protected]> Date: Tue, 17 Feb 2026 17:23:32 -0800 Subject: [PATCH 1/4] [lldb] Speed up SymbolContextList::AppendIfUnique d7fb086668dff68 changed some calls from SymbolContextList::Append to SymbolContextList::AppendIfUnique. This has unfortunately caused a huge slow down in operations involving a large amount of symbol contexts (for example, trying to autocomplete from an empty input "b <TAB>" will add every function to the list), since AppendIfUnique scans the entire symbol context list. Speed this up by adding a hash set to quickly answer whether a symbol context is on the list or not. This takes the time from running "b <TAB>" when debugging yaml2obj on my machine from 600 seconds down to 13, which is about the same as before d7fb086668dff68. Note that AppendIfUnique has a logic error, which has been present since its introduction. This has to do with the behavior controlled by "merge_symbol_into_function", which will try to merge symbols with symbol context containing the equivalent function to that symbol. The previous patch tried to correct this by adding CompareConsideringPossiblyNullSymbol(), which is not quite correct. With CompareConsideringPossiblyNullSymbol(), if symbols are added in this order: - Symbol context without symbol. - Equivalent symbol context with symbol. The list will have only one symbol context WITHOUT the symbol. If we stop using CompareConsideringPossiblyNullSymbol() and instead go back to the == operator which d7fb086668dff68 introduced, with symbols added in this order, the following will happen: - Symbol context without symbol. - Equivalent symbol context with symbol. - The bare symbol, with "merge_symbol_into_function = true", the list will have the same symbol twice. This patch does not attempt to solve this, and instead focuses on the performance issue d7fb086668dff68 introduced. rdar://170477680 --- lldb/include/lldb/Symbol/SymbolContext.h | 15 ++++ lldb/source/Symbol/SymbolContext.cpp | 106 +++++++++++++++++++---- 2 files changed, 104 insertions(+), 17 deletions(-) diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h index 0834825cdbd25..f63d23eaefae3 100644 --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -19,6 +19,7 @@ #include "lldb/Utility/Iterable.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-private.h" +#include "llvm/ADT/DenseSet.h" namespace lldb_private { @@ -483,6 +484,20 @@ class SymbolContextList { // Member variables. collection m_symbol_contexts; ///< The list of symbol contexts. +private: + struct SymbolContextInfo { + static SymbolContext getEmptyKey(); + static SymbolContext getTombstoneKey(); + static unsigned getHashValue(const SymbolContext &sc); + static bool isEqual(const SymbolContext &lhs, const SymbolContext &rhs); + }; + + /// Threshold above which we switch from linear scan to hash-set lookup + /// for uniqueness checks. + static constexpr size_t kSetThreshold = 1024; + + llvm::SmallDenseSet<SymbolContext, 1, SymbolContextInfo> m_lookup_set; + public: const_iterator begin() const { return m_symbol_contexts.begin(); } const_iterator end() const { return m_symbol_contexts.end(); } diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index ead924afa9fc2..62b4091c6b5fb 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -28,6 +28,8 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" #include "lldb/lldb-enumerations.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" using namespace lldb; using namespace lldb_private; @@ -1191,18 +1193,66 @@ void SymbolContextSpecifier::GetDescription( // SymbolContextList // +// SymbolContextInfo implementations for DenseSet +SymbolContext SymbolContextList::SymbolContextInfo::getEmptyKey() { + SymbolContext sc; + sc.function = llvm::DenseMapInfo<Function *>::getEmptyKey(); + return sc; +} + +SymbolContext SymbolContextList::SymbolContextInfo::getTombstoneKey() { + SymbolContext sc; + sc.function = llvm::DenseMapInfo<Function *>::getTombstoneKey(); + return sc; +} + +unsigned +SymbolContextList::SymbolContextInfo::getHashValue(const SymbolContext &sc) { + // Hash all fields EXCEPT symbol, since CompareConsideringPossiblyNullSymbol + // ignores them. + auto line_entry_hash = + sc.line_entry.IsValid() + ? llvm::hash_combine( + sc.line_entry.range.GetBaseAddress().GetFileAddress(), + sc.line_entry.range.GetByteSize(), + sc.line_entry.is_terminal_entry, sc.line_entry.line, + sc.line_entry.column) + : llvm::hash_value(0); + return static_cast<unsigned>(llvm::hash_combine( + sc.function, sc.module_sp.get(), sc.comp_unit, sc.target_sp.get(), + line_entry_hash, sc.variable, sc.block)); +} + +bool SymbolContextList::SymbolContextInfo::isEqual(const SymbolContext &lhs, + const SymbolContext &rhs) { + // Check for empty/tombstone keys first, since these are invalid pointers we + // don't want to accidentally dereference them in + // CompareConsideringPossiblyNullSymbol. + if (lhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() || + rhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() || + lhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey() || + rhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey()) + return lhs.function == rhs.function; + + return SymbolContext::CompareConsideringPossiblyNullSymbol(lhs, rhs); +} + SymbolContextList::SymbolContextList() : m_symbol_contexts() {} SymbolContextList::~SymbolContextList() = default; void SymbolContextList::Append(const SymbolContext &sc) { m_symbol_contexts.push_back(sc); + if (!m_lookup_set.empty()) + m_lookup_set.insert(sc); } void SymbolContextList::Append(const SymbolContextList &sc_list) { - collection::const_iterator pos, end = sc_list.m_symbol_contexts.end(); - for (pos = sc_list.m_symbol_contexts.begin(); pos != end; ++pos) - m_symbol_contexts.push_back(*pos); + for (const auto &sc : sc_list.m_symbol_contexts) { + m_symbol_contexts.push_back(sc); + if (!m_lookup_set.empty()) + m_lookup_set.insert(sc); + } } uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list, @@ -1218,30 +1268,44 @@ uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list, bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, bool merge_symbol_into_function) { - collection::iterator pos, end = m_symbol_contexts.end(); - for (pos = m_symbol_contexts.begin(); pos != end; ++pos) { - // Because symbol contexts might first be built without the symbol, - // which is then appended later on, compare the symbol contexts taking into - // accout that one (or either) of them might not have a symbol yet. - if (SymbolContext::CompareConsideringPossiblyNullSymbol(*pos, sc)) + + // Look in the set if it's populated, otherwise, fall back to linear scan. + if (!m_lookup_set.empty()) { + if (m_lookup_set.count(sc)) + return false; + } else if (m_symbol_contexts.size() < kSetThreshold) { + for (const auto &existing : m_symbol_contexts) { + if (SymbolContext::CompareConsideringPossiblyNullSymbol(existing, sc)) + return false; + } + } else { + // We've just crossed the threshold. Populate the set from existing items. + // Since the set won't be empty after this every new append operation will + // keep the set in sync with the vector. + for (const auto &existing : m_symbol_contexts) + m_lookup_set.insert(existing); + if (m_lookup_set.count(sc)) return false; } + + // This path is only taken when sc is an "isolated symbol" (only symbol field + // is set), so it's not the hot path. if (merge_symbol_into_function && sc.symbol != nullptr && sc.comp_unit == nullptr && sc.function == nullptr && sc.block == nullptr && !sc.line_entry.IsValid()) { if (sc.symbol->ValueIsAddress()) { - for (pos = m_symbol_contexts.begin(); pos != end; ++pos) { + for (auto &pos : m_symbol_contexts) { // Don't merge symbols into inlined function symbol contexts - if (pos->block && pos->block->GetContainingInlinedBlock()) + if (pos.block && pos.block->GetContainingInlinedBlock()) continue; - if (pos->function) { - if (pos->function->GetAddress() == sc.symbol->GetAddressRef()) { + if (pos.function) { + if (pos.function->GetAddress() == sc.symbol->GetAddressRef()) { // Do we already have a function with this symbol? - if (pos->symbol == sc.symbol) + if (pos.symbol == sc.symbol) return false; - if (pos->symbol == nullptr) { - pos->symbol = sc.symbol; + if (pos.symbol == nullptr) { + pos.symbol = sc.symbol; return false; } } @@ -1249,11 +1313,17 @@ bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, } } } + m_symbol_contexts.push_back(sc); + if (!m_lookup_set.empty()) + m_lookup_set.insert(sc); return true; } -void SymbolContextList::Clear() { m_symbol_contexts.clear(); } +void SymbolContextList::Clear() { + m_symbol_contexts.clear(); + m_lookup_set.clear(); +} void SymbolContextList::Dump(Stream *s, Target *target) const { @@ -1281,6 +1351,8 @@ bool SymbolContextList::GetContextAtIndex(size_t idx, SymbolContext &sc) const { bool SymbolContextList::RemoveContextAtIndex(size_t idx) { if (idx < m_symbol_contexts.size()) { + if (!m_lookup_set.empty()) + m_lookup_set.erase(m_symbol_contexts[idx]); m_symbol_contexts.erase(m_symbol_contexts.begin() + idx); return true; } >From 44781dbea9bc84ea9eaae97f1b1f72e103bef779 Mon Sep 17 00:00:00 2001 From: Augusto Noronha <[email protected]> Date: Thu, 19 Feb 2026 07:53:16 -0800 Subject: [PATCH 2/4] Use SetVector instead --- lldb/include/lldb/Symbol/SymbolContext.h | 34 +++++++------- lldb/source/Core/Module.cpp | 2 +- lldb/source/Symbol/SymbolContext.cpp | 60 ++++++------------------ 3 files changed, 33 insertions(+), 63 deletions(-) diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h index f63d23eaefae3..12a1726a59e7d 100644 --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -19,7 +19,7 @@ #include "lldb/Utility/Iterable.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-private.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" namespace lldb_private { @@ -445,7 +445,7 @@ class SymbolContextList { /// otherwise. bool GetContextAtIndex(size_t idx, SymbolContext &sc) const; - /// Direct reference accessor for a symbol context at index \a idx. + /// Direct const reference accessor for a symbol context at index \a idx. /// /// The index \a idx must be a valid index, no error checking will be done /// to ensure that it is valid. @@ -454,13 +454,20 @@ class SymbolContextList { /// The zero based index into the symbol context list. /// /// \return - /// A const reference to the symbol context to fill in. - SymbolContext &operator[](size_t idx) { return m_symbol_contexts[idx]; } - + /// A const reference to the symbol context. const SymbolContext &operator[](size_t idx) const { return m_symbol_contexts[idx]; } + /// Replace the symbol in the symbol context at index \a idx. + /// + /// The symbol field is excluded from the hash and equality used by the + /// internal set, so this is the only mutation that is safe to perform on + /// an element that is already in the list. + void SetSymbolAtIndex(size_t idx, Symbol *symbol) { + const_cast<SymbolContext &>(m_symbol_contexts[idx]).symbol = symbol; + } + bool RemoveContextAtIndex(size_t idx); /// Get accessor for a symbol context list size. @@ -476,14 +483,6 @@ class SymbolContextList { void GetDescription(Stream *s, lldb::DescriptionLevel level, Target *target) const; -protected: - typedef std::vector<SymbolContext> - collection; ///< The collection type for the list. - typedef collection::const_iterator const_iterator; - - // Member variables. - collection m_symbol_contexts; ///< The list of symbol contexts. - private: struct SymbolContextInfo { static SymbolContext getEmptyKey(); @@ -492,11 +491,12 @@ class SymbolContextList { static bool isEqual(const SymbolContext &lhs, const SymbolContext &rhs); }; - /// Threshold above which we switch from linear scan to hash-set lookup - /// for uniqueness checks. - static constexpr size_t kSetThreshold = 1024; + using set_type = llvm::DenseSet<SymbolContext, SymbolContextInfo>; + using collection = + llvm::SetVector<SymbolContext, std::vector<SymbolContext>, set_type>; - llvm::SmallDenseSet<SymbolContext, 1, SymbolContextInfo> m_lookup_set; + // Member variables. + collection m_symbol_contexts; ///< The list of symbol contexts. public: const_iterator begin() const { return m_symbol_contexts.begin(); } diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index fc17daf86a901..9cd4e82790d4f 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -929,7 +929,7 @@ void Module::FindFunctions(const RegularExpression ®ex, if (pos == end) sc_list.Append(sc); else - sc_list[pos->second].symbol = sc.symbol; + sc_list.SetSymbolAtIndex(pos->second, sc.symbol); } } } diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 62b4091c6b5fb..789f9a901a7f6 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -1242,25 +1242,19 @@ SymbolContextList::SymbolContextList() : m_symbol_contexts() {} SymbolContextList::~SymbolContextList() = default; void SymbolContextList::Append(const SymbolContext &sc) { - m_symbol_contexts.push_back(sc); - if (!m_lookup_set.empty()) - m_lookup_set.insert(sc); + m_symbol_contexts.insert(sc); } void SymbolContextList::Append(const SymbolContextList &sc_list) { - for (const auto &sc : sc_list.m_symbol_contexts) { - m_symbol_contexts.push_back(sc); - if (!m_lookup_set.empty()) - m_lookup_set.insert(sc); - } + for (const auto &sc : sc_list.m_symbol_contexts) + m_symbol_contexts.insert(sc); } uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list, bool merge_symbol_into_function) { uint32_t unique_sc_add_count = 0; - collection::const_iterator pos, end = sc_list.m_symbol_contexts.end(); - for (pos = sc_list.m_symbol_contexts.begin(); pos != end; ++pos) { - if (AppendIfUnique(*pos, merge_symbol_into_function)) + for (const auto &sc : sc_list.m_symbol_contexts) { + if (AppendIfUnique(sc, merge_symbol_into_function)) ++unique_sc_add_count; } return unique_sc_add_count; @@ -1268,25 +1262,8 @@ uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list, bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, bool merge_symbol_into_function) { - - // Look in the set if it's populated, otherwise, fall back to linear scan. - if (!m_lookup_set.empty()) { - if (m_lookup_set.count(sc)) - return false; - } else if (m_symbol_contexts.size() < kSetThreshold) { - for (const auto &existing : m_symbol_contexts) { - if (SymbolContext::CompareConsideringPossiblyNullSymbol(existing, sc)) - return false; - } - } else { - // We've just crossed the threshold. Populate the set from existing items. - // Since the set won't be empty after this every new append operation will - // keep the set in sync with the vector. - for (const auto &existing : m_symbol_contexts) - m_lookup_set.insert(existing); - if (m_lookup_set.count(sc)) - return false; - } + if (m_symbol_contexts.contains(sc)) + return false; // This path is only taken when sc is an "isolated symbol" (only symbol field // is set), so it's not the hot path. @@ -1294,7 +1271,8 @@ bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, sc.comp_unit == nullptr && sc.function == nullptr && sc.block == nullptr && !sc.line_entry.IsValid()) { if (sc.symbol->ValueIsAddress()) { - for (auto &pos : m_symbol_contexts) { + for (size_t i = 0, e = m_symbol_contexts.size(); i != e; ++i) { + const auto &pos = m_symbol_contexts[i]; // Don't merge symbols into inlined function symbol contexts if (pos.block && pos.block->GetContainingInlinedBlock()) continue; @@ -1305,7 +1283,7 @@ bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, if (pos.symbol == sc.symbol) return false; if (pos.symbol == nullptr) { - pos.symbol = sc.symbol; + SetSymbolAtIndex(i, sc.symbol); return false; } } @@ -1314,16 +1292,11 @@ bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, } } - m_symbol_contexts.push_back(sc); - if (!m_lookup_set.empty()) - m_lookup_set.insert(sc); + m_symbol_contexts.insert(sc); return true; } -void SymbolContextList::Clear() { - m_symbol_contexts.clear(); - m_lookup_set.clear(); -} +void SymbolContextList::Clear() { m_symbol_contexts.clear(); } void SymbolContextList::Dump(Stream *s, Target *target) const { @@ -1333,10 +1306,9 @@ void SymbolContextList::Dump(Stream *s, Target *target) const { s->EOL(); s->IndentMore(); - collection::const_iterator pos, end = m_symbol_contexts.end(); - for (pos = m_symbol_contexts.begin(); pos != end; ++pos) { - // pos->Dump(s, target); - pos->GetDescription(s, eDescriptionLevelVerbose, target); + for (const auto &sc : m_symbol_contexts) { + // sc.Dump(s, target); + sc.GetDescription(s, eDescriptionLevelVerbose, target); } s->IndentLess(); } @@ -1351,8 +1323,6 @@ bool SymbolContextList::GetContextAtIndex(size_t idx, SymbolContext &sc) const { bool SymbolContextList::RemoveContextAtIndex(size_t idx) { if (idx < m_symbol_contexts.size()) { - if (!m_lookup_set.empty()) - m_lookup_set.erase(m_symbol_contexts[idx]); m_symbol_contexts.erase(m_symbol_contexts.begin() + idx); return true; } >From 6415e9dbdb2bf6b2c7b86d1249ea1c8b0b0ed1d4 Mon Sep 17 00:00:00 2001 From: Augusto Noronha <[email protected]> Date: Thu, 19 Feb 2026 07:55:11 -0800 Subject: [PATCH 3/4] Remove commented out Dump call --- lldb/source/Symbol/SymbolContext.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 789f9a901a7f6..62c53047fc23c 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -1307,7 +1307,6 @@ void SymbolContextList::Dump(Stream *s, Target *target) const { s->IndentMore(); for (const auto &sc : m_symbol_contexts) { - // sc.Dump(s, target); sc.GetDescription(s, eDescriptionLevelVerbose, target); } s->IndentLess(); >From 9e489cf6d43940c3b9be80c6621e7fd9f1d3e2b5 Mon Sep 17 00:00:00 2001 From: Augusto Noronha <[email protected]> Date: Thu, 19 Feb 2026 08:11:42 -0800 Subject: [PATCH 4/4] Add using const iterator --- lldb/include/lldb/Symbol/SymbolContext.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h index 12a1726a59e7d..44cb364337263 100644 --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -494,6 +494,7 @@ class SymbolContextList { using set_type = llvm::DenseSet<SymbolContext, SymbolContextInfo>; using collection = llvm::SetVector<SymbolContext, std::vector<SymbolContext>, set_type>; + using const_iterator = collection::const_iterator; // Member variables. collection m_symbol_contexts; ///< The list of symbol contexts. @@ -502,10 +503,10 @@ class SymbolContextList { const_iterator begin() const { return m_symbol_contexts.begin(); } const_iterator end() const { return m_symbol_contexts.end(); } - typedef llvm::iterator_range<collection::const_iterator> - SymbolContextIterable; + typedef llvm::iterator_range<const_iterator> SymbolContextIterable; SymbolContextIterable SymbolContexts() { - return SymbolContextIterable(m_symbol_contexts); + return SymbolContextIterable(m_symbol_contexts.begin(), + m_symbol_contexts.end()); } }; _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
