https://github.com/python/cpython/commit/080f444a58dae1bb76a87ce746a09fd499792cfb
commit: 080f444a58dae1bb76a87ce746a09fd499792cfb
branch: main
author: Neil Schemenauer <nas-git...@arctrix.com>
committer: nascheme <nas-git...@arctrix.com>
date: 2025-01-15T11:27:28-08:00
summary:

gh-128807: Add marking phase for free-threaded cyclic GC (gh-128808)

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
M Include/internal/pycore_gc.h
M Python/gc_free_threading.c

diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 4ff34bf8ead7d0..b1806df2706097 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -45,12 +45,13 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
  * the per-object lock.
  */
 #ifdef Py_GIL_DISABLED
-#  define _PyGC_BITS_TRACKED        (1)     // Tracked by the GC
-#  define _PyGC_BITS_FINALIZED      (2)     // tp_finalize was called
-#  define _PyGC_BITS_UNREACHABLE    (4)
-#  define _PyGC_BITS_FROZEN         (8)
-#  define _PyGC_BITS_SHARED         (16)
-#  define _PyGC_BITS_DEFERRED       (64)    // Use deferred reference counting
+#  define _PyGC_BITS_TRACKED        (1<<0)     // Tracked by the GC
+#  define _PyGC_BITS_FINALIZED      (1<<1)     // tp_finalize was called
+#  define _PyGC_BITS_UNREACHABLE    (1<<2)
+#  define _PyGC_BITS_FROZEN         (1<<3)
+#  define _PyGC_BITS_SHARED         (1<<4)
+#  define _PyGC_BITS_ALIVE          (1<<5)    // Reachable from a known root.
+#  define _PyGC_BITS_DEFERRED       (1<<6)    // Use deferred reference 
counting
 #endif
 
 #ifdef Py_GIL_DISABLED
@@ -330,6 +331,9 @@ struct _gc_runtime_state {
        collections, and are awaiting to undergo a full collection for
        the first time. */
     Py_ssize_t long_lived_pending;
+
+    /* True if gc.freeze() has been used. */
+    int freeze_active;
 #endif
 };
 
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
new file mode 100644
index 00000000000000..34952e9abb66e5
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-13-17-03-49.gh-issue-128807.BGGBxD.rst
@@ -0,0 +1,6 @@
+Add a marking phase to the free-threaded GC.  This is similar to what was
+done in GH-126491.  Since the free-threaded GC does not have generations and
+is not incremental, the marking phase looks for all objects reachable from
+known roots.  The roots are objects known to not be garbage, like the module
+dictionary for :mod:`sys`.  For most programs, this marking phase should
+make the GC a bit faster since typically less work is done per object.
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index f7f44407494e51..d1023d9351086f 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -17,6 +17,17 @@
 #include "pydtrace.h"
 #include "pycore_uniqueid.h"      // _PyObject_MergeThreadLocalRefcounts()
 
+
+// enable the "mark alive" pass of GC
+#define GC_ENABLE_MARK_ALIVE 1
+
+// include additional roots in "mark alive" pass
+#define GC_MARK_ALIVE_EXTRA_ROOTS 1
+
+// include Python stacks as set of known roots
+#define GC_MARK_ALIVE_STACKS 1
+
+
 #ifdef Py_GIL_DISABLED
 
 typedef struct _gc_runtime_state GCState;
@@ -113,28 +124,66 @@ worklist_remove(struct worklist_iter *iter)
     iter->next = iter->ptr;
 }
 
+static inline int
+gc_has_bit(PyObject *op, uint8_t bit)
+{
+    return (op->ob_gc_bits & bit) != 0;
+}
+
+static inline void
+gc_set_bit(PyObject *op, uint8_t bit)
+{
+    op->ob_gc_bits |= bit;
+}
+
+static inline void
+gc_clear_bit(PyObject *op, uint8_t bit)
+{
+    op->ob_gc_bits &= ~bit;
+}
+
 static inline int
 gc_is_frozen(PyObject *op)
 {
-    return (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0;
+    return gc_has_bit(op, _PyGC_BITS_FROZEN);
 }
 
 static inline int
 gc_is_unreachable(PyObject *op)
 {
-    return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0;
+    return gc_has_bit(op, _PyGC_BITS_UNREACHABLE);
 }
 
-static void
+static inline void
 gc_set_unreachable(PyObject *op)
 {
-    op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE;
+    gc_set_bit(op, _PyGC_BITS_UNREACHABLE);
 }
 
-static void
+static inline void
 gc_clear_unreachable(PyObject *op)
 {
-    op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE;
+    gc_clear_bit(op, _PyGC_BITS_UNREACHABLE);
+}
+
+static inline int
+gc_is_alive(PyObject *op)
+{
+    return gc_has_bit(op, _PyGC_BITS_ALIVE);
+}
+
+#ifdef GC_ENABLE_MARK_ALIVE
+static inline void
+gc_set_alive(PyObject *op)
+{
+    gc_set_bit(op, _PyGC_BITS_ALIVE);
+}
+#endif
+
+static inline void
+gc_clear_alive(PyObject *op)
+{
+    gc_clear_bit(op, _PyGC_BITS_ALIVE);
 }
 
 // Initialize the `ob_tid` field to zero if the object is not already
@@ -143,6 +192,7 @@ static void
 gc_maybe_init_refs(PyObject *op)
 {
     if (!gc_is_unreachable(op)) {
+        assert(!gc_is_alive(op));
         gc_set_unreachable(op);
         op->ob_tid = 0;
     }
@@ -264,9 +314,13 @@ static void
 gc_restore_refs(PyObject *op)
 {
     if (gc_is_unreachable(op)) {
+        assert(!gc_is_alive(op));
         gc_restore_tid(op);
         gc_clear_unreachable(op);
     }
+    else {
+        gc_clear_alive(op);
+    }
 }
 
 // Given a mimalloc memory block return the PyObject stored in it or NULL if
@@ -392,6 +446,119 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
     _Py_FOR_EACH_TSTATE_END(interp);
 }
 
+// Untrack objects that can never create reference cycles.
+// Return true if the object was untracked.
+static bool
+gc_maybe_untrack(PyObject *op)
+{
+    // Currently we only check for tuples containing only non-GC objects.  In
+    // theory we could check other immutable objects that contain references
+    // to non-GC objects.
+    if (PyTuple_CheckExact(op)) {
+        _PyTuple_MaybeUntrack(op);
+        if (!_PyObject_GC_IS_TRACKED(op)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+#ifdef GC_ENABLE_MARK_ALIVE
+static int
+mark_alive_stack_push(PyObject *op, _PyObjectStack *stack)
+{
+    if (op == NULL) {
+        return 0;
+    }
+    if (!_PyObject_GC_IS_TRACKED(op)) {
+        return 0;
+    }
+    if (gc_is_alive(op)) {
+        return 0; // already visited this object
+    }
+    if (gc_maybe_untrack(op)) {
+        return 0; // was untracked, don't visit it
+    }
+
+    // Need to call tp_traverse on this object. Add to stack and mark it
+    // alive so we don't traverse it a second time.
+    gc_set_alive(op);
+    if (_PyObjectStack_Push(stack, op) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static bool
+gc_clear_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
+                    void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+    if (gc_is_alive(op)) {
+        gc_clear_alive(op);
+    }
+    return true;
+}
+
+static void
+gc_abort_mark_alive(PyInterpreterState *interp,
+                    struct collection_state *state,
+                    _PyObjectStack *stack)
+{
+    // We failed to allocate memory for "stack" while doing the "mark
+    // alive" phase.  In that case, free the object stack and make sure
+    // that no objects have the alive bit set.
+    _PyObjectStack_Clear(stack);
+    gc_visit_heaps(interp, &gc_clear_alive_bits, &state->base);
+}
+
+#ifdef GC_MARK_ALIVE_STACKS
+static int
+gc_visit_stackref_mark_alive(_PyObjectStack *stack, _PyStackRef stackref)
+{
+    // Note: we MUST check that it is deferred before checking the rest.
+    // Otherwise we might read into invalid memory due to non-deferred 
references
+    // being dead already.
+    if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) {
+        PyObject *op = PyStackRef_AsPyObjectBorrow(stackref);
+        if (mark_alive_stack_push(op, stack) < 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int
+gc_visit_thread_stacks_mark_alive(PyInterpreterState *interp, _PyObjectStack 
*stack)
+{
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
+        for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = 
f->previous) {
+            PyObject *executable = 
PyStackRef_AsPyObjectBorrow(f->f_executable);
+            if (executable == NULL || !PyCode_Check(executable)) {
+                continue;
+            }
+
+            PyCodeObject *co = (PyCodeObject *)executable;
+            int max_stack = co->co_nlocalsplus + co->co_stacksize;
+            if (gc_visit_stackref_mark_alive(stack, f->f_executable) < 0) {
+                return -1;
+            }
+            for (int i = 0; i < max_stack; i++) {
+                if (gc_visit_stackref_mark_alive(stack, f->localsplus[i]) < 0) 
{
+                    return -1;
+                }
+            }
+        }
+    }
+    _Py_FOR_EACH_TSTATE_END(interp);
+    return 0;
+}
+#endif // GC_MARK_ALIVE_STACKS
+#endif // GC_ENABLE_MARK_ALIVE
+
 static void
 queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
 {
@@ -460,7 +627,8 @@ visit_decref(PyObject *op, void *arg)
 {
     if (_PyObject_GC_IS_TRACKED(op)
         && !_Py_IsImmortal(op)
-        && !gc_is_frozen(op))
+        && !gc_is_frozen(op)
+        && !gc_is_alive(op))
     {
         // If update_refs hasn't reached this object yet, mark it
         // as (tentatively) unreachable and initialize ob_tid to zero.
@@ -482,6 +650,10 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t 
*area,
         return true;
     }
 
+    if (gc_is_alive(op)) {
+        return true;
+    }
+
     // Exclude immortal objects from garbage collection
     if (_Py_IsImmortal(op)) {
         op->ob_tid = 0;
@@ -497,14 +669,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t 
*area,
     _PyObject_ASSERT(op, refcount >= 0);
 
     if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
-        // Untrack tuples and dicts as necessary in this pass, but not objects
-        // with zero refcount, which we will want to collect.
-        if (PyTuple_CheckExact(op)) {
-            _PyTuple_MaybeUntrack(op);
-            if (!_PyObject_GC_IS_TRACKED(op)) {
-                gc_restore_refs(op);
-                return true;
-            }
+        if (gc_maybe_untrack(op)) {
+            gc_restore_refs(op);
+            return true;
         }
     }
 
@@ -553,6 +720,21 @@ mark_reachable(PyObject *op)
 }
 
 #ifdef GC_DEBUG
+static bool
+validate_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
+                   void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+
+    _PyObject_ASSERT_WITH_MSG(op, !gc_is_alive(op),
+                              "object should not be marked as alive yet");
+
+    return true;
+}
+
 static bool
 validate_refcounts(const mi_heap_t *heap, const mi_heap_area_t *area,
                    void *block, size_t block_size, void *args)
@@ -586,6 +768,11 @@ validate_gc_objects(const mi_heap_t *heap, const 
mi_heap_area_t *area,
         return true;
     }
 
+    if (gc_is_alive(op)) {
+        _PyObject_ASSERT(op, !gc_is_unreachable(op));
+        return true;
+    }
+
     _PyObject_ASSERT(op, gc_is_unreachable(op));
     _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
                                   "refcount is too small");
@@ -602,6 +789,10 @@ mark_heap_visitor(const mi_heap_t *heap, const 
mi_heap_area_t *area,
         return true;
     }
 
+    if (gc_is_alive(op)) {
+        return true;
+    }
+
     _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
                                   "refcount is too small");
 
@@ -630,6 +821,7 @@ restore_refs(const mi_heap_t *heap, const mi_heap_area_t 
*area,
     }
     gc_restore_tid(op);
     gc_clear_unreachable(op);
+    gc_clear_alive(op);
     return true;
 }
 
@@ -679,6 +871,7 @@ scan_heap_visitor(const mi_heap_t *heap, const 
mi_heap_area_t *area,
 
     // object is reachable, restore `ob_tid`; we're done with these objects
     gc_restore_tid(op);
+    gc_clear_alive(op);
     state->long_lived_total++;
     return true;
 }
@@ -686,6 +879,89 @@ scan_heap_visitor(const mi_heap_t *heap, const 
mi_heap_area_t *area,
 static int
 move_legacy_finalizer_reachable(struct collection_state *state);
 
+#ifdef GC_ENABLE_MARK_ALIVE
+static int
+propagate_alive_bits(_PyObjectStack *stack)
+{
+    for (;;) {
+        PyObject *op = _PyObjectStack_Pop(stack);
+        if (op == NULL) {
+            break;
+        }
+        assert(_PyObject_GC_IS_TRACKED(op));
+        assert(gc_is_alive(op));
+        traverseproc traverse = Py_TYPE(op)->tp_traverse;
+        if (traverse(op, (visitproc)&mark_alive_stack_push, stack) < 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+// Using tp_traverse, mark everything reachable from known root objects
+// (which must be non-garbage) as alive (_PyGC_BITS_ALIVE is set).  In
+// most programs, this marks nearly all objects that are not actually
+// unreachable.
+//
+// Actually alive objects can be missed in this pass if they are alive
+// due to being referenced from an unknown root (e.g. an extension
+// module global), some tp_traverse methods are either missing or not
+// accurate, or objects that have been untracked.  Objects that are only
+// reachable from the aforementioned are also missed.
+//
+// If gc.freeze() is used, this pass is disabled since it is unlikely to
+// help much.  The next stages of cyclic GC will ignore objects with the
+// alive bit set.
+//
+// Returns -1 on failure (out of memory).
+static int
+mark_alive_from_roots(PyInterpreterState *interp,
+                      struct collection_state *state)
+{
+#ifdef GC_DEBUG
+    // Check that all objects don't have alive bit set
+    gc_visit_heaps(interp, &validate_alive_bits, &state->base);
+#endif
+    _PyObjectStack stack = { NULL };
+
+    #define STACK_PUSH(op) \
+        if (mark_alive_stack_push(op, &stack) < 0) { \
+            gc_abort_mark_alive(interp, state, &stack); \
+            return -1; \
+        }
+    STACK_PUSH(interp->sysdict);
+#ifdef GC_MARK_ALIVE_EXTRA_ROOTS
+    STACK_PUSH(interp->builtins);
+    STACK_PUSH(interp->dict);
+    struct types_state *types = &interp->types;
+    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
+        STACK_PUSH(types->builtins.initialized[i].tp_dict);
+        STACK_PUSH(types->builtins.initialized[i].tp_subclasses);
+    }
+    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
+        STACK_PUSH(types->for_extensions.initialized[i].tp_dict);
+        STACK_PUSH(types->for_extensions.initialized[i].tp_subclasses);
+    }
+#endif
+#ifdef GC_MARK_ALIVE_STACKS
+    if (gc_visit_thread_stacks_mark_alive(interp, &stack) < 0) {
+        gc_abort_mark_alive(interp, state, &stack);
+        return -1;
+    }
+#endif
+    #undef STACK_PUSH
+
+    // Use tp_traverse to find everything reachable from roots.
+    if (propagate_alive_bits(&stack) < 0) {
+        gc_abort_mark_alive(interp, state, &stack);
+        return -1;
+    }
+
+    return 0;
+}
+#endif // GC_ENABLE_MARK_ALIVE
+
+
 static int
 deduce_unreachable_heap(PyInterpreterState *interp,
                         struct collection_state *state)
@@ -1245,6 +1521,25 @@ gc_collect_internal(PyInterpreterState *interp, struct 
collection_state *state,
 
     process_delayed_frees(interp, state);
 
+    #ifdef GC_ENABLE_MARK_ALIVE
+    // If gc.freeze() was used, it seems likely that doing this "mark alive"
+    // pass will not be a performance win.  Typically the majority of alive
+    // objects will be marked as frozen and will be skipped anyhow, without
+    // doing this extra work.  Doing this pass also defeats one of the
+    // purposes of using freeze: avoiding writes to objects that are frozen.
+    // So, we just skip this if gc.freeze() was used.
+    if (!state->gcstate->freeze_active) {
+        // Mark objects reachable from known roots as "alive".  These will
+        // be ignored for rest of the GC pass.
+        int err = mark_alive_from_roots(interp, state);
+        if (err < 0) {
+            _PyEval_StartTheWorld(interp);
+            PyErr_NoMemory();
+            return;
+        }
+    }
+    #endif
+
     // Find unreachable objects
     int err = deduce_unreachable_heap(interp, state);
     if (err < 0) {
@@ -1253,6 +1548,11 @@ gc_collect_internal(PyInterpreterState *interp, struct 
collection_state *state,
         return;
     }
 
+#ifdef GC_DEBUG
+    // At this point, no object should have the alive bit set
+    gc_visit_heaps(interp, &validate_alive_bits, &state->base);
+#endif
+
     // Print debugging information.
     if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) {
         PyObject *op;
@@ -1564,6 +1864,8 @@ _PyGC_Freeze(PyInterpreterState *interp)
 {
     struct visitor_args args;
     _PyEval_StopTheWorld(interp);
+    GCState *gcstate = get_gc_state();
+    gcstate->freeze_active = true;
     gc_visit_heaps(interp, &visit_freeze, &args);
     _PyEval_StartTheWorld(interp);
 }
@@ -1574,7 +1876,7 @@ visit_unfreeze(const mi_heap_t *heap, const 
mi_heap_area_t *area,
 {
     PyObject *op = op_from_block(block, args, true);
     if (op != NULL) {
-        op->ob_gc_bits &= ~_PyGC_BITS_FROZEN;
+        gc_clear_bit(op, _PyGC_BITS_FROZEN);
     }
     return true;
 }
@@ -1584,6 +1886,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
 {
     struct visitor_args args;
     _PyEval_StopTheWorld(interp);
+    GCState *gcstate = get_gc_state();
+    gcstate->freeze_active = false;
     gc_visit_heaps(interp, &visit_unfreeze, &args);
     _PyEval_StartTheWorld(interp);
 }

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to