https://github.com/augusto2112 created https://github.com/llvm/llvm-project/pull/181952
d7fb086668dff68 changed some calls from SymbolContextList::Append to SymbolContextList::AppendIfUnique. This has unfortunately caused a huge slow down in operations involving a large amount of symbol contexts (for example, trying to autocomplete from an empty input "b <TAB>" will add every function to the list), since AppendIfUnique scans the entire symbol context list. Speed this up by adding a hash set to quickly answer whether a symbol context is on the list or not. This takes the time from running "b <TAB>" when debugging yaml2obj on my machine from 600 seconds down to 13, which is about the same as before d7fb086668dff68. Note that AppendIfUnique has a logic error, which has been present since its introduction. This has to do with the behavior controlled by "merge_symbol_into_function", which will try to merge symbols with symbol context containing the equivalent function to that symbol. The previous patch tried to correct this by adding CompareConsideringPossiblyNullSymbol(), which is not quite correct. With CompareConsideringPossiblyNullSymbol(), if symbols are added in this order: - Symbol context without symbol. - Equivalent symbol context with symbol. The list will have only one symbol context WITHOUT the symbol. If we stop using CompareConsideringPossiblyNullSymbol() and instead go back to the == operator which d7fb086668dff68 introduced, with symbols added in this order, the following will happen: - Symbol context without symbol. - Equivalent symbol context with symbol. - The bare symbol, with "merge_symbol_into_function = true", the list will have the same symbol twice. This patch does not attempt to solve this, and instead focuses on the performance issue d7fb086668dff68 introduced. rdar://170477680 >From dfa58acd83dde0285c1b9fdb3863fc6ec187bdf5 Mon Sep 17 00:00:00 2001 From: Augusto Noronha <[email protected]> Date: Tue, 17 Feb 2026 17:23:32 -0800 Subject: [PATCH] [lldb] Speed up SymbolContextList::AppendIfUnique d7fb086668dff68 changed some calls from SymbolContextList::Append to SymbolContextList::AppendIfUnique. This has unfortunately caused a huge slow down in operations involving a large amount of symbol contexts (for example, trying to autocomplete from an empty input "b <TAB>" will add every function to the list), since AppendIfUnique scans the entire symbol context list. Speed this up by adding a hash set to quickly answer whether a symbol context is on the list or not. This takes the time from running "b <TAB>" when debugging yaml2obj on my machine from 600 seconds down to 13, which is about the same as before d7fb086668dff68. Note that AppendIfUnique has a logic error, which has been present since its introduction. This has to do with the behavior controlled by "merge_symbol_into_function", which will try to merge symbols with symbol context containing the equivalent function to that symbol. The previous patch tried to correct this by adding CompareConsideringPossiblyNullSymbol(), which is not quite correct. With CompareConsideringPossiblyNullSymbol(), if symbols are added in this order: - Symbol context without symbol. - Equivalent symbol context with symbol. The list will have only one symbol context WITHOUT the symbol. If we stop using CompareConsideringPossiblyNullSymbol() and instead go back to the == operator which d7fb086668dff68 introduced, with symbols added in this order, the following will happen: - Symbol context without symbol. - Equivalent symbol context with symbol. - The bare symbol, with "merge_symbol_into_function = true", the list will have the same symbol twice. This patch does not attempt to solve this, and instead focuses on the performance issue d7fb086668dff68 introduced. rdar://170477680 --- lldb/include/lldb/Symbol/SymbolContext.h | 15 ++++ lldb/source/Symbol/SymbolContext.cpp | 105 +++++++++++++++++++---- 2 files changed, 103 insertions(+), 17 deletions(-) diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h index 0834825cdbd25..f63d23eaefae3 100644 --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -19,6 +19,7 @@ #include "lldb/Utility/Iterable.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-private.h" +#include "llvm/ADT/DenseSet.h" namespace lldb_private { @@ -483,6 +484,20 @@ class SymbolContextList { // Member variables. collection m_symbol_contexts; ///< The list of symbol contexts. +private: + struct SymbolContextInfo { + static SymbolContext getEmptyKey(); + static SymbolContext getTombstoneKey(); + static unsigned getHashValue(const SymbolContext &sc); + static bool isEqual(const SymbolContext &lhs, const SymbolContext &rhs); + }; + + /// Threshold above which we switch from linear scan to hash-set lookup + /// for uniqueness checks. + static constexpr size_t kSetThreshold = 1024; + + llvm::SmallDenseSet<SymbolContext, 1, SymbolContextInfo> m_lookup_set; + public: const_iterator begin() const { return m_symbol_contexts.begin(); } const_iterator end() const { return m_symbol_contexts.end(); } diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index ead924afa9fc2..637765b775acf 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -28,6 +28,8 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" #include "lldb/lldb-enumerations.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" using namespace lldb; using namespace lldb_private; @@ -1191,18 +1193,65 @@ void SymbolContextSpecifier::GetDescription( // SymbolContextList // +// SymbolContextInfo implementations for DenseSet +SymbolContext SymbolContextList::SymbolContextInfo::getEmptyKey() { + SymbolContext sc; + sc.function = llvm::DenseMapInfo<Function *>::getEmptyKey(); + return sc; +} + +SymbolContext SymbolContextList::SymbolContextInfo::getTombstoneKey() { + SymbolContext sc; + sc.function = llvm::DenseMapInfo<Function *>::getTombstoneKey(); + return sc; +} + +unsigned +SymbolContextList::SymbolContextInfo::getHashValue(const SymbolContext &sc) { + // Hash all fields EXCEPT symbol, since CompareConsideringPossiblyNullSymbol + // ignores them. + auto line_entry_hash = sc.line_entry.IsValid() + ? llvm::hash_combine( + sc.line_entry.range.GetBaseAddress().GetFileAddress(), + sc.line_entry.range.GetByteSize(), + sc.line_entry.is_terminal_entry, + sc.line_entry.line, + sc.line_entry.column) + : llvm::hash_value(0); + return static_cast<unsigned>(llvm::hash_combine( + sc.function, sc.module_sp.get(), sc.comp_unit, sc.target_sp.get(), + line_entry_hash, sc.variable, sc.block)); +} + +bool SymbolContextList::SymbolContextInfo::isEqual(const SymbolContext &lhs, + const SymbolContext &rhs) { + // Check for empty/tombstone keys first, since these are invalid pointers we don't + // want to accidentally dereference them in CompareConsideringPossiblyNullSymbol. + if (lhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() || + rhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() || + lhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey() || + rhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey()) + return lhs.function == rhs.function; + + return SymbolContext::CompareConsideringPossiblyNullSymbol(lhs, rhs); +} + SymbolContextList::SymbolContextList() : m_symbol_contexts() {} SymbolContextList::~SymbolContextList() = default; void SymbolContextList::Append(const SymbolContext &sc) { m_symbol_contexts.push_back(sc); + if (!m_lookup_set.empty()) + m_lookup_set.insert(sc); } void SymbolContextList::Append(const SymbolContextList &sc_list) { - collection::const_iterator pos, end = sc_list.m_symbol_contexts.end(); - for (pos = sc_list.m_symbol_contexts.begin(); pos != end; ++pos) - m_symbol_contexts.push_back(*pos); + for (const auto &sc : sc_list.m_symbol_contexts) { + m_symbol_contexts.push_back(sc); + if (!m_lookup_set.empty()) + m_lookup_set.insert(sc); + } } uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list, @@ -1218,30 +1267,44 @@ uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list, bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, bool merge_symbol_into_function) { - collection::iterator pos, end = m_symbol_contexts.end(); - for (pos = m_symbol_contexts.begin(); pos != end; ++pos) { - // Because symbol contexts might first be built without the symbol, - // which is then appended later on, compare the symbol contexts taking into - // accout that one (or either) of them might not have a symbol yet. - if (SymbolContext::CompareConsideringPossiblyNullSymbol(*pos, sc)) + + // Look in the set if it's populated, otherwise, fall back to linear scan. + if (!m_lookup_set.empty()) { + if (m_lookup_set.count(sc)) + return false; + } else if (m_symbol_contexts.size() < kSetThreshold) { + for (const auto &existing : m_symbol_contexts) { + if (SymbolContext::CompareConsideringPossiblyNullSymbol(existing, sc)) + return false; + } + } else { + // We've just crossed the threshold. Populate the set from existing items. + // Since the set won't be empty after this every new append operation will + // keep the set in sync with the vector. + for (const auto &existing : m_symbol_contexts) + m_lookup_set.insert(existing); + if (m_lookup_set.count(sc)) return false; } + + // This path is only taken when sc is an "isolated symbol" (only symbol field + // is set), so it's not the hot path. if (merge_symbol_into_function && sc.symbol != nullptr && sc.comp_unit == nullptr && sc.function == nullptr && sc.block == nullptr && !sc.line_entry.IsValid()) { if (sc.symbol->ValueIsAddress()) { - for (pos = m_symbol_contexts.begin(); pos != end; ++pos) { + for (auto &pos : m_symbol_contexts) { // Don't merge symbols into inlined function symbol contexts - if (pos->block && pos->block->GetContainingInlinedBlock()) + if (pos.block && pos.block->GetContainingInlinedBlock()) continue; - if (pos->function) { - if (pos->function->GetAddress() == sc.symbol->GetAddressRef()) { + if (pos.function) { + if (pos.function->GetAddress() == sc.symbol->GetAddressRef()) { // Do we already have a function with this symbol? - if (pos->symbol == sc.symbol) + if (pos.symbol == sc.symbol) return false; - if (pos->symbol == nullptr) { - pos->symbol = sc.symbol; + if (pos.symbol == nullptr) { + pos.symbol = sc.symbol; return false; } } @@ -1249,11 +1312,17 @@ bool SymbolContextList::AppendIfUnique(const SymbolContext &sc, } } } + m_symbol_contexts.push_back(sc); + if (!m_lookup_set.empty()) + m_lookup_set.insert(sc); return true; } -void SymbolContextList::Clear() { m_symbol_contexts.clear(); } +void SymbolContextList::Clear() { + m_symbol_contexts.clear(); + m_lookup_set.clear(); +} void SymbolContextList::Dump(Stream *s, Target *target) const { @@ -1281,6 +1350,8 @@ bool SymbolContextList::GetContextAtIndex(size_t idx, SymbolContext &sc) const { bool SymbolContextList::RemoveContextAtIndex(size_t idx) { if (idx < m_symbol_contexts.size()) { + if (!m_lookup_set.empty()) + m_lookup_set.erase(m_symbol_contexts[idx]); m_symbol_contexts.erase(m_symbol_contexts.begin() + idx); return true; } _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
