https://github.com/python/cpython/commit/080f444a58dae1bb76a87ce746a09fd499792cfb
commit: 080f444a58dae1bb76a87ce746a09fd499792cfb
branch: main
author: Neil Schemenauer <[email protected]>
committer: nascheme <[email protected]>
date: 2025-01-15T11:27:28-08:00
summary:
gh-128807: Add marking phase for free-threaded cyclic GC (gh-128808)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
M Include/internal/pycore_gc.h
M Python/gc_free_threading.c
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 4ff34bf8ead7d0..b1806df2706097 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -45,12 +45,13 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
* the per-object lock.
*/
#ifdef Py_GIL_DISABLED
-# define _PyGC_BITS_TRACKED (1) // Tracked by the GC
-# define _PyGC_BITS_FINALIZED (2) // tp_finalize was called
-# define _PyGC_BITS_UNREACHABLE (4)
-# define _PyGC_BITS_FROZEN (8)
-# define _PyGC_BITS_SHARED (16)
-# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting
+# define _PyGC_BITS_TRACKED (1<<0) // Tracked by the GC
+# define _PyGC_BITS_FINALIZED (1<<1) // tp_finalize was called
+# define _PyGC_BITS_UNREACHABLE (1<<2)
+# define _PyGC_BITS_FROZEN (1<<3)
+# define _PyGC_BITS_SHARED (1<<4)
+# define _PyGC_BITS_ALIVE (1<<5) // Reachable from a known root.
+# define _PyGC_BITS_DEFERRED (1<<6) // Use deferred reference
counting
#endif
#ifdef Py_GIL_DISABLED
@@ -330,6 +331,9 @@ struct _gc_runtime_state {
collections, and are awaiting to undergo a full collection for
the first time. */
Py_ssize_t long_lived_pending;
+
+ /* True if gc.freeze() has been used. */
+ int freeze_active;
#endif
};
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
new file mode 100644
index 00000000000000..34952e9abb66e5
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
@@ -0,0 +1,6 @@
+Add a marking phase to the free-threaded GC. This is similar to what was
+done in GH-126491. Since the free-threaded GC does not have generations and
+is not incremental, the marking phase looks for all objects reachable from
+known roots. The roots are objects known to not be garbage, like the module
+dictionary for :mod:`sys`. For most programs, this marking phase should
+make the GC a bit faster since typically less work is done per object.
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index f7f44407494e51..d1023d9351086f 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -17,6 +17,17 @@
#include "pydtrace.h"
#include "pycore_uniqueid.h" // _PyObject_MergeThreadLocalRefcounts()
+
+// enable the "mark alive" pass of GC
+#define GC_ENABLE_MARK_ALIVE 1
+
+// include additional roots in "mark alive" pass
+#define GC_MARK_ALIVE_EXTRA_ROOTS 1
+
+// include Python stacks as set of known roots
+#define GC_MARK_ALIVE_STACKS 1
+
+
#ifdef Py_GIL_DISABLED
typedef struct _gc_runtime_state GCState;
@@ -113,28 +124,66 @@ worklist_remove(struct worklist_iter *iter)
iter->next = iter->ptr;
}
+static inline int
+gc_has_bit(PyObject *op, uint8_t bit)
+{
+ return (op->ob_gc_bits & bit) != 0;
+}
+
+static inline void
+gc_set_bit(PyObject *op, uint8_t bit)
+{
+ op->ob_gc_bits |= bit;
+}
+
+static inline void
+gc_clear_bit(PyObject *op, uint8_t bit)
+{
+ op->ob_gc_bits &= ~bit;
+}
+
static inline int
gc_is_frozen(PyObject *op)
{
- return (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0;
+ return gc_has_bit(op, _PyGC_BITS_FROZEN);
}
static inline int
gc_is_unreachable(PyObject *op)
{
- return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0;
+ return gc_has_bit(op, _PyGC_BITS_UNREACHABLE);
}
-static void
+static inline void
gc_set_unreachable(PyObject *op)
{
- op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE;
+ gc_set_bit(op, _PyGC_BITS_UNREACHABLE);
}
-static void
+static inline void
gc_clear_unreachable(PyObject *op)
{
- op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE;
+ gc_clear_bit(op, _PyGC_BITS_UNREACHABLE);
+}
+
+static inline int
+gc_is_alive(PyObject *op)
+{
+ return gc_has_bit(op, _PyGC_BITS_ALIVE);
+}
+
+#ifdef GC_ENABLE_MARK_ALIVE
+static inline void
+gc_set_alive(PyObject *op)
+{
+ gc_set_bit(op, _PyGC_BITS_ALIVE);
+}
+#endif
+
+static inline void
+gc_clear_alive(PyObject *op)
+{
+ gc_clear_bit(op, _PyGC_BITS_ALIVE);
}
// Initialize the `ob_tid` field to zero if the object is not already
@@ -143,6 +192,7 @@ static void
gc_maybe_init_refs(PyObject *op)
{
if (!gc_is_unreachable(op)) {
+ assert(!gc_is_alive(op));
gc_set_unreachable(op);
op->ob_tid = 0;
}
@@ -264,9 +314,13 @@ static void
gc_restore_refs(PyObject *op)
{
if (gc_is_unreachable(op)) {
+ assert(!gc_is_alive(op));
gc_restore_tid(op);
gc_clear_unreachable(op);
}
+ else {
+ gc_clear_alive(op);
+ }
}
// Given a mimalloc memory block return the PyObject stored in it or NULL if
@@ -392,6 +446,119 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
_Py_FOR_EACH_TSTATE_END(interp);
}
+// Untrack objects that can never create reference cycles.
+// Return true if the object was untracked.
+static bool
+gc_maybe_untrack(PyObject *op)
+{
+ // Currently we only check for tuples containing only non-GC objects. In
+ // theory we could check other immutable objects that contain references
+ // to non-GC objects.
+ if (PyTuple_CheckExact(op)) {
+ _PyTuple_MaybeUntrack(op);
+ if (!_PyObject_GC_IS_TRACKED(op)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+#ifdef GC_ENABLE_MARK_ALIVE
+static int
+mark_alive_stack_push(PyObject *op, _PyObjectStack *stack)
+{
+ if (op == NULL) {
+ return 0;
+ }
+ if (!_PyObject_GC_IS_TRACKED(op)) {
+ return 0;
+ }
+ if (gc_is_alive(op)) {
+ return 0; // already visited this object
+ }
+ if (gc_maybe_untrack(op)) {
+ return 0; // was untracked, don't visit it
+ }
+
+ // Need to call tp_traverse on this object. Add to stack and mark it
+ // alive so we don't traverse it a second time.
+ gc_set_alive(op);
+ if (_PyObjectStack_Push(stack, op) < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static bool
+gc_clear_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
+ void *block, size_t block_size, void *args)
+{
+ PyObject *op = op_from_block(block, args, false);
+ if (op == NULL) {
+ return true;
+ }
+ if (gc_is_alive(op)) {
+ gc_clear_alive(op);
+ }
+ return true;
+}
+
+static void
+gc_abort_mark_alive(PyInterpreterState *interp,
+ struct collection_state *state,
+ _PyObjectStack *stack)
+{
+ // We failed to allocate memory for "stack" while doing the "mark
+ // alive" phase. In that case, free the object stack and make sure
+ // that no objects have the alive bit set.
+ _PyObjectStack_Clear(stack);
+ gc_visit_heaps(interp, &gc_clear_alive_bits, &state->base);
+}
+
+#ifdef GC_MARK_ALIVE_STACKS
+static int
+gc_visit_stackref_mark_alive(_PyObjectStack *stack, _PyStackRef stackref)
+{
+ // Note: we MUST check that it is deferred before checking the rest.
+ // Otherwise we might read into invalid memory due to non-deferred
references
+ // being dead already.
+ if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) {
+ PyObject *op = PyStackRef_AsPyObjectBorrow(stackref);
+ if (mark_alive_stack_push(op, stack) < 0) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+gc_visit_thread_stacks_mark_alive(PyInterpreterState *interp, _PyObjectStack
*stack)
+{
+ _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
+ for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f =
f->previous) {
+ PyObject *executable =
PyStackRef_AsPyObjectBorrow(f->f_executable);
+ if (executable == NULL || !PyCode_Check(executable)) {
+ continue;
+ }
+
+ PyCodeObject *co = (PyCodeObject *)executable;
+ int max_stack = co->co_nlocalsplus + co->co_stacksize;
+ if (gc_visit_stackref_mark_alive(stack, f->f_executable) < 0) {
+ return -1;
+ }
+ for (int i = 0; i < max_stack; i++) {
+ if (gc_visit_stackref_mark_alive(stack, f->localsplus[i]) < 0)
{
+ return -1;
+ }
+ }
+ }
+ }
+ _Py_FOR_EACH_TSTATE_END(interp);
+ return 0;
+}
+#endif // GC_MARK_ALIVE_STACKS
+#endif // GC_ENABLE_MARK_ALIVE
+
static void
queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
{
@@ -460,7 +627,8 @@ visit_decref(PyObject *op, void *arg)
{
if (_PyObject_GC_IS_TRACKED(op)
&& !_Py_IsImmortal(op)
- && !gc_is_frozen(op))
+ && !gc_is_frozen(op)
+ && !gc_is_alive(op))
{
// If update_refs hasn't reached this object yet, mark it
// as (tentatively) unreachable and initialize ob_tid to zero.
@@ -482,6 +650,10 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t
*area,
return true;
}
+ if (gc_is_alive(op)) {
+ return true;
+ }
+
// Exclude immortal objects from garbage collection
if (_Py_IsImmortal(op)) {
op->ob_tid = 0;
@@ -497,14 +669,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t
*area,
_PyObject_ASSERT(op, refcount >= 0);
if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
- // Untrack tuples and dicts as necessary in this pass, but not objects
- // with zero refcount, which we will want to collect.
- if (PyTuple_CheckExact(op)) {
- _PyTuple_MaybeUntrack(op);
- if (!_PyObject_GC_IS_TRACKED(op)) {
- gc_restore_refs(op);
- return true;
- }
+ if (gc_maybe_untrack(op)) {
+ gc_restore_refs(op);
+ return true;
}
}
@@ -553,6 +720,21 @@ mark_reachable(PyObject *op)
}
#ifdef GC_DEBUG
+static bool
+validate_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
+ void *block, size_t block_size, void *args)
+{
+ PyObject *op = op_from_block(block, args, false);
+ if (op == NULL) {
+ return true;
+ }
+
+ _PyObject_ASSERT_WITH_MSG(op, !gc_is_alive(op),
+ "object should not be marked as alive yet");
+
+ return true;
+}
+
static bool
validate_refcounts(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
@@ -586,6 +768,11 @@ validate_gc_objects(const mi_heap_t *heap, const
mi_heap_area_t *area,
return true;
}
+ if (gc_is_alive(op)) {
+ _PyObject_ASSERT(op, !gc_is_unreachable(op));
+ return true;
+ }
+
_PyObject_ASSERT(op, gc_is_unreachable(op));
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
"refcount is too small");
@@ -602,6 +789,10 @@ mark_heap_visitor(const mi_heap_t *heap, const
mi_heap_area_t *area,
return true;
}
+ if (gc_is_alive(op)) {
+ return true;
+ }
+
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
"refcount is too small");
@@ -630,6 +821,7 @@ restore_refs(const mi_heap_t *heap, const mi_heap_area_t
*area,
}
gc_restore_tid(op);
gc_clear_unreachable(op);
+ gc_clear_alive(op);
return true;
}
@@ -679,6 +871,7 @@ scan_heap_visitor(const mi_heap_t *heap, const
mi_heap_area_t *area,
// object is reachable, restore `ob_tid`; we're done with these objects
gc_restore_tid(op);
+ gc_clear_alive(op);
state->long_lived_total++;
return true;
}
@@ -686,6 +879,89 @@ scan_heap_visitor(const mi_heap_t *heap, const
mi_heap_area_t *area,
static int
move_legacy_finalizer_reachable(struct collection_state *state);
+#ifdef GC_ENABLE_MARK_ALIVE
+static int
+propagate_alive_bits(_PyObjectStack *stack)
+{
+ for (;;) {
+ PyObject *op = _PyObjectStack_Pop(stack);
+ if (op == NULL) {
+ break;
+ }
+ assert(_PyObject_GC_IS_TRACKED(op));
+ assert(gc_is_alive(op));
+ traverseproc traverse = Py_TYPE(op)->tp_traverse;
+ if (traverse(op, (visitproc)&mark_alive_stack_push, stack) < 0) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+// Using tp_traverse, mark everything reachable from known root objects
+// (which must be non-garbage) as alive (_PyGC_BITS_ALIVE is set). In
+// most programs, this marks nearly all objects that are not actually
+// unreachable.
+//
+// Actually alive objects can be missed in this pass if they are alive
+// due to being referenced from an unknown root (e.g. an extension
+// module global), some tp_traverse methods are either missing or not
+// accurate, or objects that have been untracked. Objects that are only
+// reachable from the aforementioned are also missed.
+//
+// If gc.freeze() is used, this pass is disabled since it is unlikely to
+// help much. The next stages of cyclic GC will ignore objects with the
+// alive bit set.
+//
+// Returns -1 on failure (out of memory).
+static int
+mark_alive_from_roots(PyInterpreterState *interp,
+ struct collection_state *state)
+{
+#ifdef GC_DEBUG
+ // Check that all objects don't have alive bit set
+ gc_visit_heaps(interp, &validate_alive_bits, &state->base);
+#endif
+ _PyObjectStack stack = { NULL };
+
+ #define STACK_PUSH(op) \
+ if (mark_alive_stack_push(op, &stack) < 0) { \
+ gc_abort_mark_alive(interp, state, &stack); \
+ return -1; \
+ }
+ STACK_PUSH(interp->sysdict);
+#ifdef GC_MARK_ALIVE_EXTRA_ROOTS
+ STACK_PUSH(interp->builtins);
+ STACK_PUSH(interp->dict);
+ struct types_state *types = &interp->types;
+ for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
+ STACK_PUSH(types->builtins.initialized[i].tp_dict);
+ STACK_PUSH(types->builtins.initialized[i].tp_subclasses);
+ }
+ for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
+ STACK_PUSH(types->for_extensions.initialized[i].tp_dict);
+ STACK_PUSH(types->for_extensions.initialized[i].tp_subclasses);
+ }
+#endif
+#ifdef GC_MARK_ALIVE_STACKS
+ if (gc_visit_thread_stacks_mark_alive(interp, &stack) < 0) {
+ gc_abort_mark_alive(interp, state, &stack);
+ return -1;
+ }
+#endif
+ #undef STACK_PUSH
+
+ // Use tp_traverse to find everything reachable from roots.
+ if (propagate_alive_bits(&stack) < 0) {
+ gc_abort_mark_alive(interp, state, &stack);
+ return -1;
+ }
+
+ return 0;
+}
+#endif // GC_ENABLE_MARK_ALIVE
+
+
static int
deduce_unreachable_heap(PyInterpreterState *interp,
struct collection_state *state)
@@ -1245,6 +1521,25 @@ gc_collect_internal(PyInterpreterState *interp, struct
collection_state *state,
process_delayed_frees(interp, state);
+ #ifdef GC_ENABLE_MARK_ALIVE
+ // If gc.freeze() was used, it seems likely that doing this "mark alive"
+ // pass will not be a performance win. Typically the majority of alive
+ // objects will be marked as frozen and will be skipped anyhow, without
+ // doing this extra work. Doing this pass also defeats one of the
+ // purposes of using freeze: avoiding writes to objects that are frozen.
+ // So, we just skip this if gc.freeze() was used.
+ if (!state->gcstate->freeze_active) {
+ // Mark objects reachable from known roots as "alive". These will
+ // be ignored for rest of the GC pass.
+ int err = mark_alive_from_roots(interp, state);
+ if (err < 0) {
+ _PyEval_StartTheWorld(interp);
+ PyErr_NoMemory();
+ return;
+ }
+ }
+ #endif
+
// Find unreachable objects
int err = deduce_unreachable_heap(interp, state);
if (err < 0) {
@@ -1253,6 +1548,11 @@ gc_collect_internal(PyInterpreterState *interp, struct
collection_state *state,
return;
}
+#ifdef GC_DEBUG
+ // At this point, no object should have the alive bit set
+ gc_visit_heaps(interp, &validate_alive_bits, &state->base);
+#endif
+
// Print debugging information.
if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) {
PyObject *op;
@@ -1564,6 +1864,8 @@ _PyGC_Freeze(PyInterpreterState *interp)
{
struct visitor_args args;
_PyEval_StopTheWorld(interp);
+ GCState *gcstate = get_gc_state();
+ gcstate->freeze_active = true;
gc_visit_heaps(interp, &visit_freeze, &args);
_PyEval_StartTheWorld(interp);
}
@@ -1574,7 +1876,7 @@ visit_unfreeze(const mi_heap_t *heap, const
mi_heap_area_t *area,
{
PyObject *op = op_from_block(block, args, true);
if (op != NULL) {
- op->ob_gc_bits &= ~_PyGC_BITS_FROZEN;
+ gc_clear_bit(op, _PyGC_BITS_FROZEN);
}
return true;
}
@@ -1584,6 +1886,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
{
struct visitor_args args;
_PyEval_StopTheWorld(interp);
+ GCState *gcstate = get_gc_state();
+ gcstate->freeze_active = false;
gc_visit_heaps(interp, &visit_unfreeze, &args);
_PyEval_StartTheWorld(interp);
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]