llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: Augusto Noronha (augusto2112)

<details>
<summary>Changes</summary>

d7fb086668dff68 changed some calls from SymbolContextList::Append to 
SymbolContextList::AppendIfUnique. This has unfortunately caused a huge slow 
down in operations involving a large amount of symbol contexts (for example, 
trying to autocomplete from an empty input "b &lt;TAB&gt;" will add every 
function to the list), since AppendIfUnique scans the entire symbol context 
list. Speed this up by adding a hash set to quickly answer whether a symbol 
context is on the list or not.

This takes the time from running "b &lt;TAB&gt;" when debugging yaml2obj on my 
machine from 600 seconds down to 13, which is about the same as before 
d7fb086668dff68.

Note that AppendIfUnique has a logic error, which has been present since its 
introduction. This has to do with the behavior controlled by 
"merge_symbol_into_function", which will try to merge symbols with symbol 
context containing the equivalent function to that symbol.

The previous patch tried to correct this by adding 
CompareConsideringPossiblyNullSymbol(), which is not quite correct.

With CompareConsideringPossiblyNullSymbol(), if symbols are added in this order:

- Symbol context without symbol.
- Equivalent symbol context with symbol.

The list will have only one symbol context WITHOUT the symbol.

If we stop using CompareConsideringPossiblyNullSymbol() and instead go back to 
the == operator which d7fb086668dff68 introduced, with symbols added in this 
order, the following will happen:

- Symbol context without symbol.
- Equivalent symbol context with symbol.
- The bare symbol, with "merge_symbol_into_function = true", the list will have 
the same symbol twice.

This patch does not attempt to solve this, and instead focuses on the 
performance issue d7fb086668dff68 introduced.

rdar://170477680

---
Full diff: https://github.com/llvm/llvm-project/pull/181952.diff


2 Files Affected:

- (modified) lldb/include/lldb/Symbol/SymbolContext.h (+15) 
- (modified) lldb/source/Symbol/SymbolContext.cpp (+88-17) 


``````````diff
diff --git a/lldb/include/lldb/Symbol/SymbolContext.h 
b/lldb/include/lldb/Symbol/SymbolContext.h
index 0834825cdbd25..f63d23eaefae3 100644
--- a/lldb/include/lldb/Symbol/SymbolContext.h
+++ b/lldb/include/lldb/Symbol/SymbolContext.h
@@ -19,6 +19,7 @@
 #include "lldb/Utility/Iterable.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/lldb-private.h"
+#include "llvm/ADT/DenseSet.h"
 
 namespace lldb_private {
 
@@ -483,6 +484,20 @@ class SymbolContextList {
   // Member variables.
   collection m_symbol_contexts; ///< The list of symbol contexts.
 
+private:
+  struct SymbolContextInfo {
+    static SymbolContext getEmptyKey();
+    static SymbolContext getTombstoneKey();
+    static unsigned getHashValue(const SymbolContext &sc);
+    static bool isEqual(const SymbolContext &lhs, const SymbolContext &rhs);
+  };
+
+  /// Threshold above which we switch from linear scan to hash-set lookup
+  /// for uniqueness checks.
+  static constexpr size_t kSetThreshold = 1024;
+
+  llvm::SmallDenseSet<SymbolContext, 1, SymbolContextInfo> m_lookup_set;
+
 public:
   const_iterator begin() const { return m_symbol_contexts.begin(); }
   const_iterator end() const { return m_symbol_contexts.end(); }
diff --git a/lldb/source/Symbol/SymbolContext.cpp 
b/lldb/source/Symbol/SymbolContext.cpp
index ead924afa9fc2..637765b775acf 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -28,6 +28,8 @@
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/StreamString.h"
 #include "lldb/lldb-enumerations.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -1191,18 +1193,65 @@ void SymbolContextSpecifier::GetDescription(
 //  SymbolContextList
 //
 
+// SymbolContextInfo implementations for DenseSet
+SymbolContext SymbolContextList::SymbolContextInfo::getEmptyKey() {
+  SymbolContext sc;
+  sc.function = llvm::DenseMapInfo<Function *>::getEmptyKey();
+  return sc;
+}
+
+SymbolContext SymbolContextList::SymbolContextInfo::getTombstoneKey() {
+  SymbolContext sc;
+  sc.function = llvm::DenseMapInfo<Function *>::getTombstoneKey();
+  return sc;
+}
+
+unsigned
+SymbolContextList::SymbolContextInfo::getHashValue(const SymbolContext &sc) {
+  // Hash all fields EXCEPT symbol, since CompareConsideringPossiblyNullSymbol
+  // ignores them.
+  auto line_entry_hash = sc.line_entry.IsValid()
+                             ? llvm::hash_combine(
+                                   
sc.line_entry.range.GetBaseAddress().GetFileAddress(),
+                                   sc.line_entry.range.GetByteSize(),
+                                   sc.line_entry.is_terminal_entry,
+                                   sc.line_entry.line,
+                                   sc.line_entry.column)
+                             : llvm::hash_value(0);
+  return static_cast<unsigned>(llvm::hash_combine(
+      sc.function, sc.module_sp.get(), sc.comp_unit, sc.target_sp.get(),
+      line_entry_hash, sc.variable, sc.block));
+}
+
+bool SymbolContextList::SymbolContextInfo::isEqual(const SymbolContext &lhs,
+                                                   const SymbolContext &rhs) {
+  // Check for empty/tombstone keys first, since these are invalid pointers we 
don't 
+  // want to accidentally dereference them in 
CompareConsideringPossiblyNullSymbol.
+  if (lhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() ||
+      rhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() ||
+      lhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey() ||
+      rhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey())
+    return lhs.function == rhs.function;
+
+  return SymbolContext::CompareConsideringPossiblyNullSymbol(lhs, rhs);
+}
+
 SymbolContextList::SymbolContextList() : m_symbol_contexts() {}
 
 SymbolContextList::~SymbolContextList() = default;
 
 void SymbolContextList::Append(const SymbolContext &sc) {
   m_symbol_contexts.push_back(sc);
+  if (!m_lookup_set.empty())
+    m_lookup_set.insert(sc);
 }
 
 void SymbolContextList::Append(const SymbolContextList &sc_list) {
-  collection::const_iterator pos, end = sc_list.m_symbol_contexts.end();
-  for (pos = sc_list.m_symbol_contexts.begin(); pos != end; ++pos)
-    m_symbol_contexts.push_back(*pos);
+  for (const auto &sc : sc_list.m_symbol_contexts) {
+    m_symbol_contexts.push_back(sc);
+    if (!m_lookup_set.empty())
+      m_lookup_set.insert(sc);
+  }
 }
 
 uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list,
@@ -1218,30 +1267,44 @@ uint32_t SymbolContextList::AppendIfUnique(const 
SymbolContextList &sc_list,
 
 bool SymbolContextList::AppendIfUnique(const SymbolContext &sc,
                                        bool merge_symbol_into_function) {
-  collection::iterator pos, end = m_symbol_contexts.end();
-  for (pos = m_symbol_contexts.begin(); pos != end; ++pos) {
-    // Because symbol contexts might first be built without the symbol,
-    // which is then appended later on, compare the symbol contexts taking into
-    // accout that one (or either) of them might not have a symbol yet.
-    if (SymbolContext::CompareConsideringPossiblyNullSymbol(*pos, sc))
+
+  // Look in the set if it's populated, otherwise, fall back to linear scan.
+  if (!m_lookup_set.empty()) {
+    if (m_lookup_set.count(sc))
+      return false;
+  } else if (m_symbol_contexts.size() < kSetThreshold) {
+    for (const auto &existing : m_symbol_contexts) {
+      if (SymbolContext::CompareConsideringPossiblyNullSymbol(existing, sc))
+        return false;
+    }
+  } else {
+    // We've just crossed the threshold. Populate the set from existing items.
+    // Since the set won't be empty after this every new append operation will 
+    // keep the set in sync with the vector.
+    for (const auto &existing : m_symbol_contexts)
+      m_lookup_set.insert(existing);
+    if (m_lookup_set.count(sc))
       return false;
   }
+
+  // This path is only taken when sc is an "isolated symbol" (only symbol field
+  // is set), so it's not the hot path.
   if (merge_symbol_into_function && sc.symbol != nullptr &&
       sc.comp_unit == nullptr && sc.function == nullptr &&
       sc.block == nullptr && !sc.line_entry.IsValid()) {
     if (sc.symbol->ValueIsAddress()) {
-      for (pos = m_symbol_contexts.begin(); pos != end; ++pos) {
+      for (auto &pos : m_symbol_contexts) {
         // Don't merge symbols into inlined function symbol contexts
-        if (pos->block && pos->block->GetContainingInlinedBlock())
+        if (pos.block && pos.block->GetContainingInlinedBlock())
           continue;
 
-        if (pos->function) {
-          if (pos->function->GetAddress() == sc.symbol->GetAddressRef()) {
+        if (pos.function) {
+          if (pos.function->GetAddress() == sc.symbol->GetAddressRef()) {
             // Do we already have a function with this symbol?
-            if (pos->symbol == sc.symbol)
+            if (pos.symbol == sc.symbol)
               return false;
-            if (pos->symbol == nullptr) {
-              pos->symbol = sc.symbol;
+            if (pos.symbol == nullptr) {
+              pos.symbol = sc.symbol;
               return false;
             }
           }
@@ -1249,11 +1312,17 @@ bool SymbolContextList::AppendIfUnique(const 
SymbolContext &sc,
       }
     }
   }
+
   m_symbol_contexts.push_back(sc);
+  if (!m_lookup_set.empty())
+    m_lookup_set.insert(sc);
   return true;
 }
 
-void SymbolContextList::Clear() { m_symbol_contexts.clear(); }
+void SymbolContextList::Clear() {
+  m_symbol_contexts.clear();
+  m_lookup_set.clear();
+}
 
 void SymbolContextList::Dump(Stream *s, Target *target) const {
 
@@ -1281,6 +1350,8 @@ bool SymbolContextList::GetContextAtIndex(size_t idx, 
SymbolContext &sc) const {
 
 bool SymbolContextList::RemoveContextAtIndex(size_t idx) {
   if (idx < m_symbol_contexts.size()) {
+    if (!m_lookup_set.empty())
+      m_lookup_set.erase(m_symbol_contexts[idx]);
     m_symbol_contexts.erase(m_symbol_contexts.begin() + idx);
     return true;
   }

``````````

</details>


https://github.com/llvm/llvm-project/pull/181952
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to