https://github.com/python/cpython/commit/441affc9e7f419ef0b68f734505fa2f79fe653c7
commit: 441affc9e7f419ef0b68f734505fa2f79fe653c7
branch: main
author: Sam Gross <colesb...@gmail.com>
committer: ericsnowcurrently <ericsnowcurren...@gmail.com>
date: 2024-01-23T11:08:23-07:00
summary:

gh-111964: Implement stop-the-world pauses (gh-112471)

The `--disable-gil` builds occasionally need to pause all but one thread.  Some
examples include:

* Cyclic garbage collection, where this is often called a "stop the world event"
* Before calling `fork()`, to ensure a consistent state for internal data 
structures
* During interpreter shutdown, to ensure that daemon threads aren't accessing 
Python objects

This adds the following functions to implement global and per-interpreter 
pauses:

* `_PyEval_StopTheWorldAll()` and `_PyEval_StartTheWorldAll()` (for the global 
runtime)
* `_PyEval_StopTheWorld()` and `_PyEval_StartTheWorld()` (per-interpreter)

(The function names may change.)

These functions are no-ops outside of the `--disable-gil` build.

files:
M Include/cpython/pystate.h
M Include/internal/pycore_ceval.h
M Include/internal/pycore_interp.h
M Include/internal/pycore_llist.h
M Include/internal/pycore_pystate.h
M Include/internal/pycore_runtime.h
M Include/internal/pycore_runtime_init.h
M Include/pymacro.h
M Python/ceval_gil.c
M Python/pystate.c

diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index 10913943c1140d..60b056bdcc2f1f 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -102,7 +102,7 @@ struct _ts {
 #endif
     int _whence;
 
-    /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_GC).
+    /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, 
_Py_THREAD_SUSPENDED).
        See Include/internal/pycore_pystate.h for more details. */
     int state;
 
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index a357bfa3a26064..a66af1389541dd 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -205,6 +205,7 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, 
_PyInterpreterFrame *frame)
 #define _PY_CALLS_TO_DO_BIT 2
 #define _PY_ASYNC_EXCEPTION_BIT 3
 #define _PY_GC_SCHEDULED_BIT 4
+#define _PY_EVAL_PLEASE_STOP_BIT 5
 
 /* Reserve a few bits for future use */
 #define _PY_EVAL_EVENTS_BITS 8
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index 922c84543a1393..f953b8426e180a 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -41,6 +41,22 @@ struct _Py_long_state {
     int max_str_digits;
 };
 
+// Support for stop-the-world events. This exists in both the PyRuntime struct
+// for global pauses and in each PyInterpreterState for per-interpreter pauses.
+struct _stoptheworld_state {
+    PyMutex mutex;       // Serializes stop-the-world attempts.
+
+    // NOTE: The below fields are protected by HEAD_LOCK(runtime), not by the
+    // above mutex.
+    bool requested;      // Set when a pause is requested.
+    bool world_stopped;  // Set when the world is stopped.
+    bool is_global;      // Set when contained in PyRuntime struct.
+
+    PyEvent stop_event;  // Set when thread_countdown reaches zero.
+    Py_ssize_t thread_countdown;  // Number of threads that must pause.
+
+    PyThreadState *requester; // Thread that requested the pause (may be NULL).
+};
 
 /* cross-interpreter data registry */
 
@@ -166,6 +182,7 @@ struct _is {
 
     struct _warnings_runtime_state warnings;
     struct atexit_state atexit;
+    struct _stoptheworld_state stoptheworld;
 
 #if defined(Py_GIL_DISABLED)
     struct _mimalloc_interp_state mimalloc;
diff --git a/Include/internal/pycore_llist.h b/Include/internal/pycore_llist.h
index 5fd261da05fa5d..f629902fda9ff1 100644
--- a/Include/internal/pycore_llist.h
+++ b/Include/internal/pycore_llist.h
@@ -37,8 +37,7 @@ struct llist_node {
 };
 
 // Get the struct containing a node.
-#define llist_data(node, type, member) \
-    (type*)((char*)node - offsetof(type, member))
+#define llist_data(node, type, member) (_Py_CONTAINER_OF(node, type, member))
 
 // Iterate over a list.
 #define llist_for_each(node, head) \
diff --git a/Include/internal/pycore_pystate.h 
b/Include/internal/pycore_pystate.h
index 348c5c634284b0..289ef28f0dd9a9 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -21,23 +21,27 @@ extern "C" {
 // interpreter at the same time. Only the "bound" thread may perform the
 // transitions between "attached" and "detached" on its own PyThreadState.
 //
-// The "gc" state is used to implement stop-the-world pauses, such as for
-// cyclic garbage collection. It is only used in `--disable-gil` builds. It is
-// similar to the "detached" state, but only the thread performing a
-// stop-the-world pause may transition threads between the "detached" and "gc"
-// states. A thread trying to "attach" from the "gc" state will block until
-// it is transitioned back to "detached" when the stop-the-world pause is
-// complete.
+// The "suspended" state is used to implement stop-the-world pauses, such as
+// for cyclic garbage collection. It is only used in `--disable-gil` builds.
+// The "suspended" state is similar to the "detached" state in that in both
+// states the thread is not allowed to call most Python APIs. However, unlike
+// the "detached" state, a thread may not transition itself out from the
+// "suspended" state. Only the thread performing a stop-the-world pause may
+// transition a thread from the "suspended" state back to the "detached" state.
 //
 // State transition diagram:
 //
 //            (bound thread)        (stop-the-world thread)
-// [attached]       <->       [detached]       <->       [gc]
+// [attached]       <->       [detached]       <->       [suspended]
+//   |                                                        ^
+//   +---------------------------->---------------------------+
+//                          (bound thread)
 //
-// See `_PyThreadState_Attach()` and `_PyThreadState_Detach()`.
+// The (bound thread) and (stop-the-world thread) labels indicate which thread
+// is allowed to perform the transition.
 #define _Py_THREAD_DETACHED     0
 #define _Py_THREAD_ATTACHED     1
-#define _Py_THREAD_GC           2
+#define _Py_THREAD_SUSPENDED    2
 
 
 /* Check if the current thread is the main thread.
@@ -140,13 +144,36 @@ _PyThreadState_GET(void)
 //
 // High-level code should generally call PyEval_RestoreThread() instead, which
 // calls this function.
-void _PyThreadState_Attach(PyThreadState *tstate);
+extern void _PyThreadState_Attach(PyThreadState *tstate);
 
 // Detaches the current thread from the interpreter.
 //
 // High-level code should generally call PyEval_SaveThread() instead, which
 // calls this function.
-void _PyThreadState_Detach(PyThreadState *tstate);
+extern void _PyThreadState_Detach(PyThreadState *tstate);
+
+// Detaches the current thread to the "suspended" state if a stop-the-world
+// pause is in progress.
+//
+// If there is no stop-the-world pause in progress, then the thread switches
+// to the "detached" state.
+extern void _PyThreadState_Suspend(PyThreadState *tstate);
+
+// Perform a stop-the-world pause for all threads in the all interpreters.
+//
+// Threads in the "attached" state are paused and transitioned to the "GC"
+// state. Threads in the "detached" state switch to the "GC" state, preventing
+// them from reattaching until the stop-the-world pause is complete.
+//
+// NOTE: This is a no-op outside of Py_GIL_DISABLED builds.
+extern void _PyEval_StopTheWorldAll(_PyRuntimeState *runtime);
+extern void _PyEval_StartTheWorldAll(_PyRuntimeState *runtime);
+
+// Perform a stop-the-world pause for threads in the specified interpreter.
+//
+// NOTE: This is a no-op outside of Py_GIL_DISABLED builds.
+extern void _PyEval_StopTheWorld(PyInterpreterState *interp);
+extern void _PyEval_StartTheWorld(PyInterpreterState *interp);
 
 
 static inline void
diff --git a/Include/internal/pycore_runtime.h 
b/Include/internal/pycore_runtime.h
index e3348296ea61b7..02ab22b967b38f 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -227,6 +227,13 @@ typedef struct pyruntimestate {
     struct _faulthandler_runtime_state faulthandler;
     struct _tracemalloc_runtime_state tracemalloc;
 
+    // The rwmutex is used to prevent overlapping global and per-interpreter
+    // stop-the-world events. Global stop-the-world events lock the mutex
+    // exclusively (as a "writer"), while per-interpreter stop-the-world events
+    // lock it non-exclusively (as "readers").
+    _PyRWMutex stoptheworld_mutex;
+    struct _stoptheworld_state stoptheworld;
+
     PyPreConfig preconfig;
 
     // Audit values must be preserved when Py_Initialize()/Py_Finalize()
diff --git a/Include/internal/pycore_runtime_init.h 
b/Include/internal/pycore_runtime_init.h
index 5f47d60de37825..b4806ab09fd145 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -116,6 +116,9 @@ extern PyTypeObject _PyExc_MemoryError;
         }, \
         .faulthandler = _faulthandler_runtime_state_INIT, \
         .tracemalloc = _tracemalloc_runtime_state_INIT, \
+        .stoptheworld = { \
+            .is_global = 1, \
+        }, \
         .float_state = { \
             .float_format = _py_float_format_unknown, \
             .double_format = _py_float_format_unknown, \
diff --git a/Include/pymacro.h b/Include/pymacro.h
index 9d264fe6eea1d4..cd6fc4eba9c2ed 100644
--- a/Include/pymacro.h
+++ b/Include/pymacro.h
@@ -160,6 +160,9 @@
     Py_FatalError("Unreachable C code path reached")
 #endif
 
+#define _Py_CONTAINER_OF(ptr, type, member) \
+    (type*)((char*)ptr - offsetof(type, member))
+
 // Prevent using an expression as a l-value.
 // For example, "int x; _Py_RVALUE(x) = 1;" fails with a compiler error.
 #define _Py_RVALUE(EXPR) ((void)0, (EXPR))
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index d70abbc27606b4..f3b169241535f3 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -949,6 +949,15 @@ _Py_HandlePending(PyThreadState *tstate)
 {
     PyInterpreterState *interp = tstate->interp;
 
+    /* Stop-the-world */
+    if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_PLEASE_STOP_BIT)) {
+        _Py_set_eval_breaker_bit(interp, _PY_EVAL_PLEASE_STOP_BIT, 0);
+        _PyThreadState_Suspend(tstate);
+
+        /* The attach blocks until the stop-the-world event is complete. */
+        _PyThreadState_Attach(tstate);
+    }
+
     /* Pending signals */
     if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) {
         if (handle_signals(tstate) != 0) {
diff --git a/Python/pystate.c b/Python/pystate.c
index 999976283da675..23ddc781434ac8 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -1336,6 +1336,11 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     tstate->datastack_limit = NULL;
     tstate->what_event = -1;
 
+    if (interp->stoptheworld.requested || _PyRuntime.stoptheworld.requested) {
+        // Start in the suspended state if there is an ongoing stop-the-world.
+        tstate->state = _Py_THREAD_SUSPENDED;
+    }
+
     tstate->_status.initialized = 1;
 }
 
@@ -1562,6 +1567,9 @@ PyThreadState_Clear(PyThreadState *tstate)
     // XXX Do it as early in the function as possible.
 }
 
+static void
+decrement_stoptheworld_countdown(struct _stoptheworld_state *stw);
+
 /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */
 static void
 tstate_delete_common(PyThreadState *tstate)
@@ -1585,6 +1593,16 @@ tstate_delete_common(PyThreadState *tstate)
     if (tstate->next) {
         tstate->next->prev = tstate->prev;
     }
+    if (tstate->state != _Py_THREAD_SUSPENDED) {
+        // Any ongoing stop-the-world request should not wait for us because
+        // our thread is getting deleted.
+        if (interp->stoptheworld.requested) {
+            decrement_stoptheworld_countdown(&interp->stoptheworld);
+        }
+        if (runtime->stoptheworld.requested) {
+            decrement_stoptheworld_countdown(&runtime->stoptheworld);
+        }
+    }
     HEAD_UNLOCK(runtime);
 
     // XXX Unbind in PyThreadState_Clear(), or earlier
@@ -1790,13 +1808,9 @@ tstate_try_attach(PyThreadState *tstate)
 {
 #ifdef Py_GIL_DISABLED
     int expected = _Py_THREAD_DETACHED;
-    if (_Py_atomic_compare_exchange_int(
-            &tstate->state,
-            &expected,
-            _Py_THREAD_ATTACHED)) {
-        return 1;
-    }
-    return 0;
+    return _Py_atomic_compare_exchange_int(&tstate->state,
+                                           &expected,
+                                           _Py_THREAD_ATTACHED);
 #else
     assert(tstate->state == _Py_THREAD_DETACHED);
     tstate->state = _Py_THREAD_ATTACHED;
@@ -1815,6 +1829,20 @@ tstate_set_detached(PyThreadState *tstate)
 #endif
 }
 
+static void
+tstate_wait_attach(PyThreadState *tstate)
+{
+    do {
+        int expected = _Py_THREAD_SUSPENDED;
+
+        // Wait until we're switched out of SUSPENDED to DETACHED.
+        _PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state),
+                           /*timeout=*/-1, NULL, /*detach=*/0);
+
+        // Once we're back in DETACHED we can re-attach
+    } while (!tstate_try_attach(tstate));
+}
+
 void
 _PyThreadState_Attach(PyThreadState *tstate)
 {
@@ -1836,10 +1864,7 @@ _PyThreadState_Attach(PyThreadState *tstate)
     tstate_activate(tstate);
 
     if (!tstate_try_attach(tstate)) {
-        // TODO: Once stop-the-world GC is implemented for --disable-gil builds
-        // this will need to wait until the GC completes. For now, this case
-        // should never happen.
-        Py_FatalError("thread attach failed");
+        tstate_wait_attach(tstate);
     }
 
     // Resume previous critical section. This acquires the lock(s) from the
@@ -1853,8 +1878,8 @@ _PyThreadState_Attach(PyThreadState *tstate)
 #endif
 }
 
-void
-_PyThreadState_Detach(PyThreadState *tstate)
+static void
+detach_thread(PyThreadState *tstate, int detached_state)
 {
     // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate));
     assert(tstate->state == _Py_THREAD_ATTACHED);
@@ -1862,12 +1887,228 @@ _PyThreadState_Detach(PyThreadState *tstate)
     if (tstate->critical_section != 0) {
         _PyCriticalSection_SuspendAll(tstate);
     }
-    tstate_set_detached(tstate);
     tstate_deactivate(tstate);
+    tstate_set_detached(tstate);
     current_fast_clear(&_PyRuntime);
     _PyEval_ReleaseLock(tstate->interp, tstate);
 }
 
+void
+_PyThreadState_Detach(PyThreadState *tstate)
+{
+    detach_thread(tstate, _Py_THREAD_DETACHED);
+}
+
+void
+_PyThreadState_Suspend(PyThreadState *tstate)
+{
+    _PyRuntimeState *runtime = &_PyRuntime;
+
+    assert(tstate->state == _Py_THREAD_ATTACHED);
+
+    struct _stoptheworld_state *stw = NULL;
+    HEAD_LOCK(runtime);
+    if (runtime->stoptheworld.requested) {
+        stw = &runtime->stoptheworld;
+    }
+    else if (tstate->interp->stoptheworld.requested) {
+        stw = &tstate->interp->stoptheworld;
+    }
+    HEAD_UNLOCK(runtime);
+
+    if (stw == NULL) {
+        // Switch directly to "detached" if there is no active stop-the-world
+        // request.
+        detach_thread(tstate, _Py_THREAD_DETACHED);
+        return;
+    }
+
+    // Switch to "suspended" state.
+    detach_thread(tstate, _Py_THREAD_SUSPENDED);
+
+    // Decrease the count of remaining threads needing to park.
+    HEAD_LOCK(runtime);
+    decrement_stoptheworld_countdown(stw);
+    HEAD_UNLOCK(runtime);
+}
+
+// Decrease stop-the-world counter of remaining number of threads that need to
+// pause. If we are the final thread to pause, notify the requesting thread.
+static void
+decrement_stoptheworld_countdown(struct _stoptheworld_state *stw)
+{
+    assert(stw->thread_countdown > 0);
+    if (--stw->thread_countdown == 0) {
+        _PyEvent_Notify(&stw->stop_event);
+    }
+}
+
+#ifdef Py_GIL_DISABLED
+// Interpreter for _Py_FOR_EACH_THREAD(). For global stop-the-world events,
+// we start with the first interpreter and then iterate over all interpreters.
+// For per-interpreter stop-the-world events, we only operate on the one
+// interpreter.
+static PyInterpreterState *
+interp_for_stop_the_world(struct _stoptheworld_state *stw)
+{
+    return (stw->is_global
+        ? PyInterpreterState_Head()
+        : _Py_CONTAINER_OF(stw, PyInterpreterState, stoptheworld));
+}
+
+// Loops over threads for a stop-the-world event.
+// For global: all threads in all interpreters
+// For per-interpreter: all threads in the interpreter
+#define _Py_FOR_EACH_THREAD(stw, i, t)                                      \
+    for (i = interp_for_stop_the_world((stw));                              \
+            i != NULL; i = ((stw->is_global) ? i->next : NULL))             \
+        for (t = i->threads.head; t; t = t->next)
+
+
+// Try to transition threads atomically from the "detached" state to the
+// "gc stopped" state. Returns true if all threads are in the "gc stopped"
+static bool
+park_detached_threads(struct _stoptheworld_state *stw)
+{
+    int num_parked = 0;
+    PyInterpreterState *i;
+    PyThreadState *t;
+    _Py_FOR_EACH_THREAD(stw, i, t) {
+        int state = _Py_atomic_load_int_relaxed(&t->state);
+        if (state == _Py_THREAD_DETACHED) {
+            // Atomically transition to "suspended" if in "detached" state.
+            if (_Py_atomic_compare_exchange_int(&t->state,
+                                                &state, _Py_THREAD_SUSPENDED)) 
{
+                num_parked++;
+            }
+        }
+        else if (state == _Py_THREAD_ATTACHED && t != stw->requester) {
+            // TODO: set this per-thread, rather than per-interpreter.
+            _Py_set_eval_breaker_bit(t->interp, _PY_EVAL_PLEASE_STOP_BIT, 1);
+        }
+    }
+    stw->thread_countdown -= num_parked;
+    assert(stw->thread_countdown >= 0);
+    return num_parked > 0 && stw->thread_countdown == 0;
+}
+
+static void
+stop_the_world(struct _stoptheworld_state *stw)
+{
+    _PyRuntimeState *runtime = &_PyRuntime;
+
+    PyMutex_Lock(&stw->mutex);
+    if (stw->is_global) {
+        _PyRWMutex_Lock(&runtime->stoptheworld_mutex);
+    }
+    else {
+        _PyRWMutex_RLock(&runtime->stoptheworld_mutex);
+    }
+
+    HEAD_LOCK(runtime);
+    stw->requested = 1;
+    stw->thread_countdown = 0;
+    stw->stop_event = (PyEvent){0};  // zero-initialize (unset)
+    stw->requester = _PyThreadState_GET();  // may be NULL
+
+    PyInterpreterState *i;
+    PyThreadState *t;
+    _Py_FOR_EACH_THREAD(stw, i, t) {
+        if (t != stw->requester) {
+            // Count all the other threads (we don't wait on ourself).
+            stw->thread_countdown++;
+        }
+    }
+
+    if (stw->thread_countdown == 0) {
+        HEAD_UNLOCK(runtime);
+        stw->world_stopped = 1;
+        return;
+    }
+
+    for (;;) {
+        // Switch threads that are detached to the GC stopped state
+        bool stopped_all_threads = park_detached_threads(stw);
+        HEAD_UNLOCK(runtime);
+
+        if (stopped_all_threads) {
+            break;
+        }
+
+        _PyTime_t wait_ns = 1000*1000;  // 1ms (arbitrary, may need tuning)
+        if (PyEvent_WaitTimed(&stw->stop_event, wait_ns)) {
+            assert(stw->thread_countdown == 0);
+            break;
+        }
+
+        HEAD_LOCK(runtime);
+    }
+    stw->world_stopped = 1;
+}
+
+static void
+start_the_world(struct _stoptheworld_state *stw)
+{
+    _PyRuntimeState *runtime = &_PyRuntime;
+    assert(PyMutex_IsLocked(&stw->mutex));
+
+    HEAD_LOCK(runtime);
+    stw->requested = 0;
+    stw->world_stopped = 0;
+    stw->requester = NULL;
+    // Switch threads back to the detached state.
+    PyInterpreterState *i;
+    PyThreadState *t;
+    _Py_FOR_EACH_THREAD(stw, i, t) {
+        if (t != stw->requester) {
+            assert(t->state == _Py_THREAD_SUSPENDED);
+            _Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED);
+            _PyParkingLot_UnparkAll(&t->state);
+        }
+    }
+    HEAD_UNLOCK(runtime);
+    if (stw->is_global) {
+        _PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
+    }
+    else {
+        _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
+    }
+    PyMutex_Unlock(&stw->mutex);
+}
+#endif  // Py_GIL_DISABLED
+
+void
+_PyEval_StopTheWorldAll(_PyRuntimeState *runtime)
+{
+#ifdef Py_GIL_DISABLED
+    stop_the_world(&runtime->stoptheworld);
+#endif
+}
+
+void
+_PyEval_StartTheWorldAll(_PyRuntimeState *runtime)
+{
+#ifdef Py_GIL_DISABLED
+    start_the_world(&runtime->stoptheworld);
+#endif
+}
+
+void
+_PyEval_StopTheWorld(PyInterpreterState *interp)
+{
+#ifdef Py_GIL_DISABLED
+    stop_the_world(&interp->stoptheworld);
+#endif
+}
+
+void
+_PyEval_StartTheWorld(PyInterpreterState *interp)
+{
+#ifdef Py_GIL_DISABLED
+    start_the_world(&interp->stoptheworld);
+#endif
+}
+
 //----------
 // other API
 //----------

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to