https://github.com/medismailben updated https://github.com/llvm/llvm-project/pull/195774
>From e43d5b49c369b25f301f80149c2b41fb35a63855 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani <[email protected]> Date: Wed, 6 May 2026 17:30:10 -0700 Subject: [PATCH] [lldb] Use Policy for ProcessRunLock re-entrancy When SB API code enters a frame provider's Python callback and the callback calls back into the SB API, the inner call tries to re-acquire the same ProcessRunLock read lock. On a write-preferring rwlock, a pending writer (e.g. the override PST calling SetStopped) blocks the re-entrant reader, causing a deadlock. Fix this by having ProcessRunLocker::TryLock push a policy with holds_run_lock=true after acquiring the real lock. Re-entrant ReadTryLock calls check this flag and skip the rwlock entirely. ProcessRunLocker::Unlock pops the policy before releasing the lock. This replaces the previous thread_local unordered_map workaround with the Policy framework. rdar://176223894 Signed-off-by: Med Ismail Bennani <[email protected]> --- lldb/include/lldb/Host/ProcessRunLock.h | 25 +--- lldb/include/lldb/Target/Policy.h | 1 + lldb/source/Host/common/ProcessRunLock.cpp | 38 ++++++ lldb/source/Target/Policy.cpp | 1 + .../runlock_reentrant_deadlock/Makefile | 3 + .../TestRunLockReentrantDeadlock.py | 125 ++++++++++++++++++ .../bkpt_resolver.py | 47 +++++++ .../frame_provider.py | 38 ++++++ .../runlock_reentrant_deadlock/main.c | 18 +++ lldb/unittests/Target/PolicyTest.cpp | 6 +- 10 files changed, 279 insertions(+), 23 deletions(-) create mode 100644 lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile create mode 100644 lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py create mode 100644 lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py create mode 100644 lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py create mode 100644 lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c diff --git a/lldb/include/lldb/Host/ProcessRunLock.h b/lldb/include/lldb/Host/ProcessRunLock.h index 27d10942ddb4c..049eb36deea27 100644 --- a/lldb/include/lldb/Host/ProcessRunLock.h +++ b/lldb/include/lldb/Host/ProcessRunLock.h @@ -58,29 +58,12 @@ class ProcessRunLock { bool IsLocked() const { return m_lock; } // Try to lock the read lock, but only do so if there are no writers. - bool TryLock(ProcessRunLock *lock) { - if (m_lock) { - if (m_lock == lock) - return true; // We already have this lock locked - else - Unlock(); - } - if (lock) { - if (lock->ReadTryLock()) { - m_lock = lock; - return true; - } - } - return false; - } + // Pushes a policy with holds_run_lock=true so that re-entrant + // ReadTryLock calls from the same thread skip the real lock. + bool TryLock(ProcessRunLock *lock); protected: - void Unlock() { - if (m_lock) { - m_lock->ReadUnlock(); - m_lock = nullptr; - } - } + void Unlock(); ProcessRunLock *m_lock = nullptr; diff --git a/lldb/include/lldb/Target/Policy.h b/lldb/include/lldb/Target/Policy.h index b398b6bfd650a..5a28d4dffdf8b 100644 --- a/lldb/include/lldb/Target/Policy.h +++ b/lldb/include/lldb/Target/Policy.h @@ -49,6 +49,7 @@ struct Policy { bool can_run_breakpoint_actions = true; bool can_load_frame_providers = true; bool can_run_frame_recognizers = true; + bool holds_run_lock = false; }; View view = View::Public; diff --git a/lldb/source/Host/common/ProcessRunLock.cpp b/lldb/source/Host/common/ProcessRunLock.cpp index 8e7ef45e1e350..fdf41d4e3a445 100644 --- a/lldb/source/Host/common/ProcessRunLock.cpp +++ b/lldb/source/Host/common/ProcessRunLock.cpp @@ -8,6 +8,7 @@ #ifndef _WIN32 #include "lldb/Host/ProcessRunLock.h" +#include "lldb/Target/Policy.h" namespace lldb_private { @@ -22,6 +23,10 @@ ProcessRunLock::~ProcessRunLock() { } bool ProcessRunLock::ReadTryLock() { + Policy policy = PolicyStack::Get().Current(); + if (policy.capabilities.holds_run_lock) + return !m_running; + ::pthread_rwlock_rdlock(&m_rwlock); if (!m_running) { // coverity[missing_unlock] @@ -32,9 +37,42 @@ bool ProcessRunLock::ReadTryLock() { } bool ProcessRunLock::ReadUnlock() { + Policy policy = PolicyStack::Get().Current(); + if (policy.capabilities.holds_run_lock) + return true; + return ::pthread_rwlock_unlock(&m_rwlock) == 0; } +bool ProcessRunLock::ProcessRunLocker::TryLock(ProcessRunLock *lock) { + if (m_lock) { + if (m_lock == lock) + return true; + Unlock(); + } + if (lock) { + if (lock->ReadTryLock()) { + m_lock = lock; + // Push a policy so re-entrant ReadTryLock calls from the same + // thread (e.g. provider Python calling back into SB API) skip + // the real lock and avoid deadlocking with a pending writer. + Policy policy = PolicyStack::Get().Current(); + policy.capabilities.holds_run_lock = true; + PolicyStack::Get().Push(policy); + return true; + } + } + return false; +} + +void ProcessRunLock::ProcessRunLocker::Unlock() { + if (m_lock) { + PolicyStack::Get().Pop(); + m_lock->ReadUnlock(); + m_lock = nullptr; + } +} + bool ProcessRunLock::SetRunning() { ::pthread_rwlock_wrlock(&m_rwlock); bool was_stopped = !m_running; diff --git a/lldb/source/Target/Policy.cpp b/lldb/source/Target/Policy.cpp index d49dffb79de01..d259ca6e08c7a 100644 --- a/lldb/source/Target/Policy.cpp +++ b/lldb/source/Target/Policy.cpp @@ -33,6 +33,7 @@ void Policy::Dump(Stream &s) const { s << " bp_actions=" << capabilities.can_run_breakpoint_actions; s << " frame_providers=" << capabilities.can_load_frame_providers; s << " frame_recognizers=" << capabilities.can_run_frame_recognizers; + s << " holds_run_lock=" << capabilities.holds_run_lock; s << '}'; } diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile new file mode 100644 index 0000000000000..0b710c6e298ae --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c +CFLAGS_EXTRAS := -std=c99 +include Makefile.rules diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py new file mode 100644 index 0000000000000..2933c073ce8c9 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py @@ -0,0 +1,125 @@ +""" +Test that ProcessRunLock re-entrant read locks don't deadlock when a frame +provider's get_frame_at_index calls SB API methods. + +This reproduces the deadlock seen in lldb-rpc-server where: + + - An RPC client thread holds a ProcessRunLock read lock (from the outer + SB API call) and enters a provider's get_frame_at_index, which calls + SBFrame.IsValid -> GetStoppedExecutionContext -> ReadTryLock (re-entrant). + + - The override PST is exiting RunPrivateStateThread and calls SetStopped + -> pthread_rwlock_wrlock (blocked by client thread's read lock). + + - The client thread's re-entrant ReadTryLock blocks because the pending + writer prevents new readers on a write-preferring rwlock. + + - The original PST is blocked joining the override thread. +""" + +import os +import threading +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class TestRunLockReentrantDeadlock(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24528") + def test_runlock_reentrant_no_deadlock(self): + """ + Test that a frame provider calling SBFrame.IsValid from + get_frame_at_index does not deadlock with the override PST's + SetStopped when another thread triggers EvaluateExpression. + """ + self.build() + + target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, "main") + + resolver_path = os.path.join(self.getSourceDir(), "bkpt_resolver.py") + provider_path = os.path.join(self.getSourceDir(), "frame_provider.py") + self.runCmd("command script import " + resolver_path) + self.runCmd("command script import " + provider_path) + + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "frame_provider.SBAPIAccessInGetFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue( + error.Success(), + f"Should register frame provider: {error}", + ) + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + extra_args = lldb.SBStructuredData() + stream = lldb.SBStream() + stream.Print('{"symbol": "target_func"}') + extra_args.SetFromJSON(stream) + + bkpt = target.BreakpointCreateFromScript( + "bkpt_resolver.ExprEvalResolver", + extra_args, + lldb.SBFileSpecList(), + lldb.SBFileSpecList(), + ) + self.assertTrue(bkpt.IsValid(), "Scripted breakpoint should be valid") + self.assertGreater( + bkpt.GetNumLocations(), 0, "Breakpoint should have locations" + ) + + # Spawn a thread that repeatedly accesses frames through the provider. + # This simulates an RPC client thread that holds the ProcessRunLock + # read lock and enters get_frame_at_index -> SBFrame.IsValid. + stop_frame_access = threading.Event() + frame_access_error = [None] + + def access_frames(): + try: + while not stop_frame_access.is_set(): + t = process.GetSelectedThread() + if t.IsValid(): + for i in range(t.GetNumFrames()): + f = t.GetFrameAtIndex(i) + f.IsValid() + except Exception as e: + frame_access_error[0] = str(e) + + frame_thread = threading.Thread(target=access_frames, daemon=True) + frame_thread.start() + + # Continue into the breakpoint. was_hit calls EvaluateExpression on + # the PST, which triggers RunThreadPlan -> override PST. + # The frame access thread is concurrently calling GetFrameAtIndex -> + # provider get_frame_at_index -> SBFrame.IsValid. + # Without the fix, this deadlocks. + process.Continue() + self.assertState(process.GetState(), lldb.eStateStopped) + + stop_frame_access.set() + frame_thread.join(timeout=5) + self.assertFalse(frame_thread.is_alive(), "Frame access thread should exit") + self.assertIsNone( + frame_access_error[0], + f"Frame access thread hit an error: {frame_access_error[0]}", + ) + + thread = process.GetSelectedThread() + self.assertTrue(thread.IsValid(), "Thread should be valid") + self.assertEqual( + thread.GetStopReason(), + lldb.eStopReasonBreakpoint, + "Should stop at breakpoint", + ) + + g_value = target.FindFirstGlobalVariable("g_value") + self.assertTrue(g_value.IsValid(), "Should find g_value") + self.assertGreater( + g_value.GetValueAsUnsigned(), + 0, + "g_value should have been incremented by the was_hit callback", + ) diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py new file mode 100644 index 0000000000000..7fb7f8675f2a6 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py @@ -0,0 +1,47 @@ +""" +Scripted breakpoint resolver whose was_hit callback calls EvaluateExpression. + +This reproduces the deadlock seen in the sample where: +1. BreakpointResolverScripted::WasHit runs on the private state thread +2. was_hit calls SBFrame.EvaluateExpression -> RunThreadPlan -> Halt -> + WaitForProcessToStop (holds a mutex, waits for state event) +3. The override private state thread handles the stop and loads a scripted + frame provider +4. The provider's __init__ calls SBThread.__bool__ -> GetStoppedExecutionContext + -> tries to acquire the same mutex -> DEADLOCK +""" + +import lldb + + +class ExprEvalResolver: + """Scripted breakpoint resolver that evaluates an expression in was_hit.""" + + def __init__(self, bkpt, extra_args, dict): + self.bkpt = bkpt + sym_name = extra_args.GetValueForKey("symbol").GetStringValue(100) + self.sym_name = sym_name + self.facade_loc = None + + def __callback__(self, sym_ctx): + sym = sym_ctx.module.FindSymbol(self.sym_name, lldb.eSymbolTypeCode) + if sym.IsValid(): + self.bkpt.AddLocation(sym.GetStartAddress()) + self.facade_loc = self.bkpt.AddFacadeLocation() + + def get_short_help(self): + return f"ExprEvalResolver for {self.sym_name}" + + def was_hit(self, frame, bp_loc): + # This runs on the private state thread. Calling EvaluateExpression + # here triggers RunThreadPlan -> Halt -> WaitForProcessToStop, which + # holds a mutex and waits for a state change event. + options = lldb.SBExpressionOptions() + options.SetStopOthers(True) + options.SetTryAllThreads(False) + + result = frame.EvaluateExpression("increment()", options) + if not result.error.success: + return lldb.LLDB_INVALID_BREAK_ID + + return self.facade_loc diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py new file mode 100644 index 0000000000000..413484c264513 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py @@ -0,0 +1,38 @@ +""" +Frame provider whose get_frame_at_index calls SBFrame::IsValid on input frames. + +When a client thread accesses frames through this provider while the PST is +mid-expression-eval (RunThreadPlan), the following deadlock occurs: + + - Client thread: holds ProcessRunLock read lock (from outer SB API call), + enters get_frame_at_index, calls SBFrame.IsValid -> + GetStoppedExecutionContext -> ReadTryLock (re-entrant, blocked by + pending writer) + + - Override PST: finishing RunPrivateStateThread, calls SetStopped -> + pthread_rwlock_wrlock (blocked by client thread's read lock) + +The re-entrant read lock blocks because macOS uses a write-preferring rwlock. +""" + +import lldb +from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider + + +class SBAPIAccessInGetFrameProvider(ScriptedFrameProvider): + """Provider that calls SBFrame.IsValid from get_frame_at_index.""" + + @staticmethod + def get_description(): + return "Provider that accesses SB API in get_frame_at_index" + + def get_frame_at_index(self, idx): + if idx < len(self.input_frames): + frame = self.input_frames.GetFrameAtIndex(idx) + # This call triggers GetStoppedExecutionContext -> + # ProcessRunLock::ReadTryLock. If the current thread already + # holds the read lock (from the outer SB API entry point), + # and a writer is pending, this re-entrant read lock blocks. + frame.IsValid() + return idx + return None diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c new file mode 100644 index 0000000000000..9ebeaf3380ed9 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c @@ -0,0 +1,18 @@ +#include <stdio.h> + +int g_value = 0; + +int increment() { return ++g_value; } + +int target_func() { + printf("target_func: %d\n", g_value); + return g_value; +} + +int main() { + for (int i = 0; i < 10; i++) { + target_func(); + } + increment(); + return 0; +} diff --git a/lldb/unittests/Target/PolicyTest.cpp b/lldb/unittests/Target/PolicyTest.cpp index 757879495a05a..671c5aae69cdc 100644 --- a/lldb/unittests/Target/PolicyTest.cpp +++ b/lldb/unittests/Target/PolicyTest.cpp @@ -126,7 +126,8 @@ TEST(PolicyTest, DumpPublicState) { EXPECT_EQ(s.GetString(), "policy: view=public, capabilities={" "eval_expr=true run_all=true try_all=true " - "bp_actions=true frame_providers=true frame_recognizers=true}"); + "bp_actions=true frame_providers=true frame_recognizers=true " + "holds_run_lock=false}"); } TEST(PolicyTest, DumpPrivateState) { @@ -135,7 +136,8 @@ TEST(PolicyTest, DumpPrivateState) { EXPECT_EQ(s.GetString(), "policy: view=private, capabilities={" "eval_expr=true run_all=true try_all=true " - "bp_actions=true frame_providers=false frame_recognizers=false}"); + "bp_actions=true frame_providers=false frame_recognizers=false " + "holds_run_lock=false}"); } TEST(PolicyTest, DumpStack) { _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
