https://github.com/medismailben updated 
https://github.com/llvm/llvm-project/pull/195774

>From e43d5b49c369b25f301f80149c2b41fb35a63855 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <[email protected]>
Date: Wed, 6 May 2026 17:30:10 -0700
Subject: [PATCH] [lldb] Use Policy for ProcessRunLock re-entrancy

When SB API code enters a frame provider's Python callback and the
callback calls back into the SB API, the inner call tries to
re-acquire the same ProcessRunLock read lock. On a write-preferring
rwlock, a pending writer (e.g. the override PST calling SetStopped)
blocks the re-entrant reader, causing a deadlock.

Fix this by having ProcessRunLocker::TryLock push a policy with
holds_run_lock=true after acquiring the real lock. Re-entrant
ReadTryLock calls check this flag and skip the rwlock entirely.
ProcessRunLocker::Unlock pops the policy before releasing the lock.

This replaces the previous thread_local unordered_map workaround
with the Policy framework.

rdar://176223894

Signed-off-by: Med Ismail Bennani <[email protected]>
---
 lldb/include/lldb/Host/ProcessRunLock.h       |  25 +---
 lldb/include/lldb/Target/Policy.h             |   1 +
 lldb/source/Host/common/ProcessRunLock.cpp    |  38 ++++++
 lldb/source/Target/Policy.cpp                 |   1 +
 .../runlock_reentrant_deadlock/Makefile       |   3 +
 .../TestRunLockReentrantDeadlock.py           | 125 ++++++++++++++++++
 .../bkpt_resolver.py                          |  47 +++++++
 .../frame_provider.py                         |  38 ++++++
 .../runlock_reentrant_deadlock/main.c         |  18 +++
 lldb/unittests/Target/PolicyTest.cpp          |   6 +-
 10 files changed, 279 insertions(+), 23 deletions(-)
 create mode 100644 
lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile
 create mode 100644 
lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py
 create mode 100644 
lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py
 create mode 100644 
lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py
 create mode 100644 
lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c

diff --git a/lldb/include/lldb/Host/ProcessRunLock.h 
b/lldb/include/lldb/Host/ProcessRunLock.h
index 27d10942ddb4c..049eb36deea27 100644
--- a/lldb/include/lldb/Host/ProcessRunLock.h
+++ b/lldb/include/lldb/Host/ProcessRunLock.h
@@ -58,29 +58,12 @@ class ProcessRunLock {
     bool IsLocked() const { return m_lock; }
 
     // Try to lock the read lock, but only do so if there are no writers.
-    bool TryLock(ProcessRunLock *lock) {
-      if (m_lock) {
-        if (m_lock == lock)
-          return true; // We already have this lock locked
-        else
-          Unlock();
-      }
-      if (lock) {
-        if (lock->ReadTryLock()) {
-          m_lock = lock;
-          return true;
-        }
-      }
-      return false;
-    }
+    // Pushes a policy with holds_run_lock=true so that re-entrant
+    // ReadTryLock calls from the same thread skip the real lock.
+    bool TryLock(ProcessRunLock *lock);
 
   protected:
-    void Unlock() {
-      if (m_lock) {
-        m_lock->ReadUnlock();
-        m_lock = nullptr;
-      }
-    }
+    void Unlock();
 
     ProcessRunLock *m_lock = nullptr;
 
diff --git a/lldb/include/lldb/Target/Policy.h 
b/lldb/include/lldb/Target/Policy.h
index b398b6bfd650a..5a28d4dffdf8b 100644
--- a/lldb/include/lldb/Target/Policy.h
+++ b/lldb/include/lldb/Target/Policy.h
@@ -49,6 +49,7 @@ struct Policy {
     bool can_run_breakpoint_actions = true;
     bool can_load_frame_providers = true;
     bool can_run_frame_recognizers = true;
+    bool holds_run_lock = false;
   };
 
   View view = View::Public;
diff --git a/lldb/source/Host/common/ProcessRunLock.cpp 
b/lldb/source/Host/common/ProcessRunLock.cpp
index 8e7ef45e1e350..fdf41d4e3a445 100644
--- a/lldb/source/Host/common/ProcessRunLock.cpp
+++ b/lldb/source/Host/common/ProcessRunLock.cpp
@@ -8,6 +8,7 @@
 
 #ifndef _WIN32
 #include "lldb/Host/ProcessRunLock.h"
+#include "lldb/Target/Policy.h"
 
 namespace lldb_private {
 
@@ -22,6 +23,10 @@ ProcessRunLock::~ProcessRunLock() {
 }
 
 bool ProcessRunLock::ReadTryLock() {
+  Policy policy = PolicyStack::Get().Current();
+  if (policy.capabilities.holds_run_lock)
+    return !m_running;
+
   ::pthread_rwlock_rdlock(&m_rwlock);
   if (!m_running) {
     // coverity[missing_unlock]
@@ -32,9 +37,42 @@ bool ProcessRunLock::ReadTryLock() {
 }
 
 bool ProcessRunLock::ReadUnlock() {
+  Policy policy = PolicyStack::Get().Current();
+  if (policy.capabilities.holds_run_lock)
+    return true;
+
   return ::pthread_rwlock_unlock(&m_rwlock) == 0;
 }
 
+bool ProcessRunLock::ProcessRunLocker::TryLock(ProcessRunLock *lock) {
+  if (m_lock) {
+    if (m_lock == lock)
+      return true;
+    Unlock();
+  }
+  if (lock) {
+    if (lock->ReadTryLock()) {
+      m_lock = lock;
+      // Push a policy so re-entrant ReadTryLock calls from the same
+      // thread (e.g. provider Python calling back into SB API) skip
+      // the real lock and avoid deadlocking with a pending writer.
+      Policy policy = PolicyStack::Get().Current();
+      policy.capabilities.holds_run_lock = true;
+      PolicyStack::Get().Push(policy);
+      return true;
+    }
+  }
+  return false;
+}
+
+void ProcessRunLock::ProcessRunLocker::Unlock() {
+  if (m_lock) {
+    PolicyStack::Get().Pop();
+    m_lock->ReadUnlock();
+    m_lock = nullptr;
+  }
+}
+
 bool ProcessRunLock::SetRunning() {
   ::pthread_rwlock_wrlock(&m_rwlock);
   bool was_stopped = !m_running;
diff --git a/lldb/source/Target/Policy.cpp b/lldb/source/Target/Policy.cpp
index d49dffb79de01..d259ca6e08c7a 100644
--- a/lldb/source/Target/Policy.cpp
+++ b/lldb/source/Target/Policy.cpp
@@ -33,6 +33,7 @@ void Policy::Dump(Stream &s) const {
   s << " bp_actions=" << capabilities.can_run_breakpoint_actions;
   s << " frame_providers=" << capabilities.can_load_frame_providers;
   s << " frame_recognizers=" << capabilities.can_run_frame_recognizers;
+  s << " holds_run_lock=" << capabilities.holds_run_lock;
   s << '}';
 }
 
diff --git 
a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile
 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile
new file mode 100644
index 0000000000000..0b710c6e298ae
--- /dev/null
+++ 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+CFLAGS_EXTRAS := -std=c99
+include Makefile.rules
diff --git 
a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py
 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py
new file mode 100644
index 0000000000000..2933c073ce8c9
--- /dev/null
+++ 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py
@@ -0,0 +1,125 @@
+"""
+Test that ProcessRunLock re-entrant read locks don't deadlock when a frame
+provider's get_frame_at_index calls SB API methods.
+
+This reproduces the deadlock seen in lldb-rpc-server where:
+
+  - An RPC client thread holds a ProcessRunLock read lock (from the outer
+    SB API call) and enters a provider's get_frame_at_index, which calls
+    SBFrame.IsValid -> GetStoppedExecutionContext -> ReadTryLock (re-entrant).
+
+  - The override PST is exiting RunPrivateStateThread and calls SetStopped
+    -> pthread_rwlock_wrlock (blocked by client thread's read lock).
+
+  - The client thread's re-entrant ReadTryLock blocks because the pending
+    writer prevents new readers on a write-preferring rwlock.
+
+  - The original PST is blocked joining the override thread.
+"""
+
+import os
+import threading
+import lldb
+import lldbsuite.test.lldbutil as lldbutil
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+
+
+class TestRunLockReentrantDeadlock(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24528")
+    def test_runlock_reentrant_no_deadlock(self):
+        """
+        Test that a frame provider calling SBFrame.IsValid from
+        get_frame_at_index does not deadlock with the override PST's
+        SetStopped when another thread triggers EvaluateExpression.
+        """
+        self.build()
+
+        target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, 
"main")
+
+        resolver_path = os.path.join(self.getSourceDir(), "bkpt_resolver.py")
+        provider_path = os.path.join(self.getSourceDir(), "frame_provider.py")
+        self.runCmd("command script import " + resolver_path)
+        self.runCmd("command script import " + provider_path)
+
+        error = lldb.SBError()
+        provider_id = target.RegisterScriptedFrameProvider(
+            "frame_provider.SBAPIAccessInGetFrameProvider",
+            lldb.SBStructuredData(),
+            error,
+        )
+        self.assertTrue(
+            error.Success(),
+            f"Should register frame provider: {error}",
+        )
+        self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero")
+
+        extra_args = lldb.SBStructuredData()
+        stream = lldb.SBStream()
+        stream.Print('{"symbol": "target_func"}')
+        extra_args.SetFromJSON(stream)
+
+        bkpt = target.BreakpointCreateFromScript(
+            "bkpt_resolver.ExprEvalResolver",
+            extra_args,
+            lldb.SBFileSpecList(),
+            lldb.SBFileSpecList(),
+        )
+        self.assertTrue(bkpt.IsValid(), "Scripted breakpoint should be valid")
+        self.assertGreater(
+            bkpt.GetNumLocations(), 0, "Breakpoint should have locations"
+        )
+
+        # Spawn a thread that repeatedly accesses frames through the provider.
+        # This simulates an RPC client thread that holds the ProcessRunLock
+        # read lock and enters get_frame_at_index -> SBFrame.IsValid.
+        stop_frame_access = threading.Event()
+        frame_access_error = [None]
+
+        def access_frames():
+            try:
+                while not stop_frame_access.is_set():
+                    t = process.GetSelectedThread()
+                    if t.IsValid():
+                        for i in range(t.GetNumFrames()):
+                            f = t.GetFrameAtIndex(i)
+                            f.IsValid()
+            except Exception as e:
+                frame_access_error[0] = str(e)
+
+        frame_thread = threading.Thread(target=access_frames, daemon=True)
+        frame_thread.start()
+
+        # Continue into the breakpoint. was_hit calls EvaluateExpression on
+        # the PST, which triggers RunThreadPlan -> override PST.
+        # The frame access thread is concurrently calling GetFrameAtIndex ->
+        # provider get_frame_at_index -> SBFrame.IsValid.
+        # Without the fix, this deadlocks.
+        process.Continue()
+        self.assertState(process.GetState(), lldb.eStateStopped)
+
+        stop_frame_access.set()
+        frame_thread.join(timeout=5)
+        self.assertFalse(frame_thread.is_alive(), "Frame access thread should 
exit")
+        self.assertIsNone(
+            frame_access_error[0],
+            f"Frame access thread hit an error: {frame_access_error[0]}",
+        )
+
+        thread = process.GetSelectedThread()
+        self.assertTrue(thread.IsValid(), "Thread should be valid")
+        self.assertEqual(
+            thread.GetStopReason(),
+            lldb.eStopReasonBreakpoint,
+            "Should stop at breakpoint",
+        )
+
+        g_value = target.FindFirstGlobalVariable("g_value")
+        self.assertTrue(g_value.IsValid(), "Should find g_value")
+        self.assertGreater(
+            g_value.GetValueAsUnsigned(),
+            0,
+            "g_value should have been incremented by the was_hit callback",
+        )
diff --git 
a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py
 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py
new file mode 100644
index 0000000000000..7fb7f8675f2a6
--- /dev/null
+++ 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py
@@ -0,0 +1,47 @@
+"""
+Scripted breakpoint resolver whose was_hit callback calls EvaluateExpression.
+
+This reproduces the deadlock seen in the sample where:
+1. BreakpointResolverScripted::WasHit runs on the private state thread
+2. was_hit calls SBFrame.EvaluateExpression -> RunThreadPlan -> Halt ->
+   WaitForProcessToStop (holds a mutex, waits for state event)
+3. The override private state thread handles the stop and loads a scripted
+   frame provider
+4. The provider's __init__ calls SBThread.__bool__ -> 
GetStoppedExecutionContext
+   -> tries to acquire the same mutex -> DEADLOCK
+"""
+
+import lldb
+
+
+class ExprEvalResolver:
+    """Scripted breakpoint resolver that evaluates an expression in was_hit."""
+
+    def __init__(self, bkpt, extra_args, dict):
+        self.bkpt = bkpt
+        sym_name = extra_args.GetValueForKey("symbol").GetStringValue(100)
+        self.sym_name = sym_name
+        self.facade_loc = None
+
+    def __callback__(self, sym_ctx):
+        sym = sym_ctx.module.FindSymbol(self.sym_name, lldb.eSymbolTypeCode)
+        if sym.IsValid():
+            self.bkpt.AddLocation(sym.GetStartAddress())
+            self.facade_loc = self.bkpt.AddFacadeLocation()
+
+    def get_short_help(self):
+        return f"ExprEvalResolver for {self.sym_name}"
+
+    def was_hit(self, frame, bp_loc):
+        # This runs on the private state thread. Calling EvaluateExpression
+        # here triggers RunThreadPlan -> Halt -> WaitForProcessToStop, which
+        # holds a mutex and waits for a state change event.
+        options = lldb.SBExpressionOptions()
+        options.SetStopOthers(True)
+        options.SetTryAllThreads(False)
+
+        result = frame.EvaluateExpression("increment()", options)
+        if not result.error.success:
+            return lldb.LLDB_INVALID_BREAK_ID
+
+        return self.facade_loc
diff --git 
a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py
 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py
new file mode 100644
index 0000000000000..413484c264513
--- /dev/null
+++ 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py
@@ -0,0 +1,38 @@
+"""
+Frame provider whose get_frame_at_index calls SBFrame::IsValid on input frames.
+
+When a client thread accesses frames through this provider while the PST is
+mid-expression-eval (RunThreadPlan), the following deadlock occurs:
+
+  - Client thread: holds ProcessRunLock read lock (from outer SB API call),
+    enters get_frame_at_index, calls SBFrame.IsValid ->
+    GetStoppedExecutionContext -> ReadTryLock (re-entrant, blocked by
+    pending writer)
+
+  - Override PST: finishing RunPrivateStateThread, calls SetStopped ->
+    pthread_rwlock_wrlock (blocked by client thread's read lock)
+
+The re-entrant read lock blocks because macOS uses a write-preferring rwlock.
+"""
+
+import lldb
+from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider
+
+
+class SBAPIAccessInGetFrameProvider(ScriptedFrameProvider):
+    """Provider that calls SBFrame.IsValid from get_frame_at_index."""
+
+    @staticmethod
+    def get_description():
+        return "Provider that accesses SB API in get_frame_at_index"
+
+    def get_frame_at_index(self, idx):
+        if idx < len(self.input_frames):
+            frame = self.input_frames.GetFrameAtIndex(idx)
+            # This call triggers GetStoppedExecutionContext ->
+            # ProcessRunLock::ReadTryLock. If the current thread already
+            # holds the read lock (from the outer SB API entry point),
+            # and a writer is pending, this re-entrant read lock blocks.
+            frame.IsValid()
+            return idx
+        return None
diff --git 
a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c
 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c
new file mode 100644
index 0000000000000..9ebeaf3380ed9
--- /dev/null
+++ 
b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+
+int g_value = 0;
+
+int increment() { return ++g_value; }
+
+int target_func() {
+  printf("target_func: %d\n", g_value);
+  return g_value;
+}
+
+int main() {
+  for (int i = 0; i < 10; i++) {
+    target_func();
+  }
+  increment();
+  return 0;
+}
diff --git a/lldb/unittests/Target/PolicyTest.cpp 
b/lldb/unittests/Target/PolicyTest.cpp
index 757879495a05a..671c5aae69cdc 100644
--- a/lldb/unittests/Target/PolicyTest.cpp
+++ b/lldb/unittests/Target/PolicyTest.cpp
@@ -126,7 +126,8 @@ TEST(PolicyTest, DumpPublicState) {
   EXPECT_EQ(s.GetString(),
             "policy: view=public, capabilities={"
             "eval_expr=true run_all=true try_all=true "
-            "bp_actions=true frame_providers=true frame_recognizers=true}");
+            "bp_actions=true frame_providers=true frame_recognizers=true "
+            "holds_run_lock=false}");
 }
 
 TEST(PolicyTest, DumpPrivateState) {
@@ -135,7 +136,8 @@ TEST(PolicyTest, DumpPrivateState) {
   EXPECT_EQ(s.GetString(),
             "policy: view=private, capabilities={"
             "eval_expr=true run_all=true try_all=true "
-            "bp_actions=true frame_providers=false frame_recognizers=false}");
+            "bp_actions=true frame_providers=false frame_recognizers=false "
+            "holds_run_lock=false}");
 }
 
 TEST(PolicyTest, DumpStack) {

_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to