https://github.com/python/cpython/commit/0fdf6a9a71f9b3c9adc0406542902e028431e7ca
commit: 0fdf6a9a71f9b3c9adc0406542902e028431e7ca
branch: main
author: Sam Gross <[email protected]>
committer: colesbury <[email protected]>
date: 2026-02-06T15:14:08Z
summary:

gh-144513: Skip critical section locking during stop-the-world (gh-144524)

When the interpreter is in a stop-the-world pause, critical sections
don't need to acquire locks since no other threads can be running.
This avoids a potential deadlock where lock fairness hands off ownership
to a thread that has already suspended for stop-the-world.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst
M Modules/_testinternalcapi/test_critical_sections.c
M Python/critical_section.c

diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst
new file mode 100644
index 00000000000000..f97160172735e1
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst
@@ -0,0 +1,2 @@
+Fix potential deadlock when using critical sections during stop-the-world
+pauses in the free-threaded build.
diff --git a/Modules/_testinternalcapi/test_critical_sections.c 
b/Modules/_testinternalcapi/test_critical_sections.c
index e3b2fe716d48d3..72a1fa2cdc7224 100644
--- a/Modules/_testinternalcapi/test_critical_sections.c
+++ b/Modules/_testinternalcapi/test_critical_sections.c
@@ -4,6 +4,8 @@
 
 #include "parts.h"
 #include "pycore_critical_section.h"
+#include "pycore_pystate.h"
+#include "pycore_pythread.h"
 
 #ifdef MS_WINDOWS
 #  include <windows.h>            // Sleep()
@@ -381,6 +383,87 @@ test_critical_section2_reacquisition(PyObject *self, 
PyObject *Py_UNUSED(args))
 
 #endif // Py_GIL_DISABLED
 
+#ifdef Py_CAN_START_THREADS
+
+// gh-144513: Test that critical sections don't deadlock with stop-the-world.
+// This test is designed to deadlock (timeout) on builds without the fix.
+struct test_data_stw {
+    PyObject *obj;
+    Py_ssize_t num_threads;
+    Py_ssize_t started;
+    PyEvent ready;
+};
+
+static void
+thread_stw(void *arg)
+{
+    struct test_data_stw *test_data = arg;
+    PyGILState_STATE gil = PyGILState_Ensure();
+
+    if (_Py_atomic_add_ssize(&test_data->started, 1) == test_data->num_threads 
- 1) {
+        _PyEvent_Notify(&test_data->ready);
+    }
+
+    // All threads: acquire critical section and hold it long enough to
+    // trigger TIME_TO_BE_FAIR_NS (1 ms), which causes direct handoff on 
unlock.
+    Py_BEGIN_CRITICAL_SECTION(test_data->obj);
+    pysleep(10);  // 10 ms = 10 x TIME_TO_BE_FAIR_NS
+    Py_END_CRITICAL_SECTION();
+
+    PyGILState_Release(gil);
+}
+
+static PyObject *
+test_critical_sections_stw(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    // gh-144513: Test that critical sections don't deadlock during STW.
+    //
+    // The deadlock occurs when lock ownership is handed off (due to fairness
+    // after TIME_TO_BE_FAIR_NS) to a thread that has already suspended for
+    // stop-the-world. The STW requester then cannot acquire the lock.
+    //
+    // With the fix, the STW requester detects world_stopped and skips locking.
+
+    #define STW_NUM_THREADS 2
+    struct test_data_stw test_data = {
+        .obj = PyDict_New(),
+        .num_threads = STW_NUM_THREADS,
+    };
+    if (test_data.obj == NULL) {
+        return NULL;
+    }
+
+    PyThread_handle_t handles[STW_NUM_THREADS];
+    PyThread_ident_t idents[STW_NUM_THREADS];
+    for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
+        PyThread_start_joinable_thread(&thread_stw, &test_data,
+                                       &idents[i], &handles[i]);
+    }
+
+    // Wait for threads to start, then let them compete for the lock
+    PyEvent_Wait(&test_data.ready);
+    pysleep(5);
+
+    // Request stop-the-world and try to acquire the critical section.
+    // Without the fix, this may deadlock.
+    PyInterpreterState *interp = PyInterpreterState_Get();
+    _PyEval_StopTheWorld(interp);
+
+    Py_BEGIN_CRITICAL_SECTION(test_data.obj);
+    Py_END_CRITICAL_SECTION();
+
+    _PyEval_StartTheWorld(interp);
+
+    for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
+        PyThread_join_thread(handles[i]);
+    }
+    #undef STW_NUM_THREADS
+    Py_DECREF(test_data.obj);
+    Py_RETURN_NONE;
+}
+
+#endif // Py_CAN_START_THREADS
+
 static PyMethodDef test_methods[] = {
     {"test_critical_sections", test_critical_sections, METH_NOARGS},
     {"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS},
@@ -392,6 +475,7 @@ static PyMethodDef test_methods[] = {
 #ifdef Py_CAN_START_THREADS
     {"test_critical_sections_threads", test_critical_sections_threads, 
METH_NOARGS},
     {"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
+    {"test_critical_sections_stw", test_critical_sections_stw, METH_NOARGS},
 #endif
     {NULL, NULL} /* sentinel */
 };
diff --git a/Python/critical_section.c b/Python/critical_section.c
index 2c2152f5de4716..98e23eda7cdd77 100644
--- a/Python/critical_section.c
+++ b/Python/critical_section.c
@@ -1,7 +1,8 @@
 #include "Python.h"
 
-#include "pycore_lock.h"
 #include "pycore_critical_section.h"
+#include "pycore_interp.h"
+#include "pycore_lock.h"
 
 #ifdef Py_GIL_DISABLED
 static_assert(_Alignof(PyCriticalSection) >= 4,
@@ -42,6 +43,15 @@ _PyCriticalSection_BeginSlow(PyThreadState *tstate, 
PyCriticalSection *c, PyMute
             }
         }
     }
+    // If the world is stopped, we don't need to acquire the lock because
+    // there are no other threads that could be accessing the object.
+    // Without this check, acquiring a critical section while the world is
+    // stopped could lead to a deadlock.
+    if (tstate->interp->stoptheworld.world_stopped) {
+        c->_cs_mutex = NULL;
+        c->_cs_prev = 0;
+        return;
+    }
     c->_cs_mutex = NULL;
     c->_cs_prev = (uintptr_t)tstate->critical_section;
     tstate->critical_section = (uintptr_t)c;
@@ -56,6 +66,12 @@ _PyCriticalSection2_BeginSlow(PyThreadState *tstate, 
PyCriticalSection2 *c, PyMu
                               int is_m1_locked)
 {
 #ifdef Py_GIL_DISABLED
+    if (tstate->interp->stoptheworld.world_stopped) {
+        c->_cs_base._cs_mutex = NULL;
+        c->_cs_mutex2 = NULL;
+        c->_cs_base._cs_prev = 0;
+        return;
+    }
     c->_cs_base._cs_mutex = NULL;
     c->_cs_mutex2 = NULL;
     c->_cs_base._cs_prev = tstate->critical_section;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to