[Python-checkins] gh-142531: Fix free-threaded GC performance regression (gh-142562)

nascheme Thu, 11 Dec 2025 12:31:19 -0800

https://github.com/python/cpython/commit/e38967ed60778146050cc88dd16b70bfd867fde7
commit: e38967ed60778146050cc88dd16b70bfd867fde7
branch: main
author: Neil Schemenauer <[email protected]>
committer: nascheme <[email protected]>
date: 2025-12-11T12:30:56-08:00
summary:


gh-142531: Fix free-threaded GC performance regression (gh-142562)

If there are many untracked tuples, the GC will run too often, resulting
in poor performance.  The fix is to include untracked tuples in the
"long lived" object count. The number of frozen objects is also now
included since the free-threaded GC must scan those too.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst
M Lib/test/test_gc.py
M Modules/_testinternalcapi.c
M Python/gc_free_threading.c

diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index ec5df4d20e7085..6aa6361d5d0b92 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1231,6 +1231,24 @@ def test():
         assert_python_ok("-c", code_inside_function)
 
 
+    @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+    def test_tuple_untrack_counts(self):
+        # This ensures that the free-threaded GC is counting untracked tuples
+        # in the "long_lived_total" count.  This is required to avoid
+        # performance issues from running the GC too frequently.  See
+        # GH-142531 as an example.
+        gc.collect()
+        count = _testinternalcapi.get_long_lived_total()
+        n = 20_000
+        tuples = [(x,) for x in range(n)]
+        gc.collect()
+        new_count = _testinternalcapi.get_long_lived_total()
+        self.assertFalse(gc.is_tracked(tuples[0]))
+        # Use n // 2 just in case some other objects were collected.
+        self.assertTrue(new_count - count > (n // 2))
+
+
 class IncrementalGCTests(unittest.TestCase):
     @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
     @requires_gil_enabled("Free threading does not support incremental GC")
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst
new file mode 100644
index 00000000000000..15e03c1b9dd03f
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst
@@ -0,0 +1,5 @@
+Fix a free-threaded GC performance regression.  If there are many untracked
+tuples, the GC will run too often, resulting in poor performance.  The fix
+is to include untracked tuples in the "long lived" object count. The number
+of frozen objects is also now included since the free-threaded GC must
+scan those too.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 89e558b0fe8933..4140cd23ded95e 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2250,6 +2250,13 @@ get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
     }
     return PyLong_FromVoidPtr(bc);
 }
+
+static PyObject *
+get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
+}
+
 #endif
 
 static PyObject *
@@ -2590,6 +2597,7 @@ static PyMethodDef module_functions[] = {
     {"py_thread_id", get_py_thread_id, METH_NOARGS},
     {"get_tlbc", get_tlbc, METH_O, NULL},
     {"get_tlbc_id", get_tlbc_id, METH_O, NULL},
+    {"get_long_lived_total", get_long_lived_total, METH_NOARGS},
 #endif
 #ifdef _Py_TIER2
     {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index e672e870db2f27..7ba94d5381b72e 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -375,6 +375,19 @@ op_from_block(void *block, void *arg, bool include_frozen)
     return op;
 }
 
+// As above but returns untracked and frozen objects as well.
+static PyObject *
+op_from_block_all_gc(void *block, void *arg)
+{
+    struct visitor_args *a = arg;
+    if (block == NULL) {
+        return NULL;
+    }
+    PyObject *op = (PyObject *)((char*)block + a->offset);
+    assert(PyObject_IS_GC(op));
+    return op;
+}
+
 static int
 gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun 
*visitor,
                          struct visitor_args *arg)
@@ -1186,12 +1199,20 @@ static bool
 scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
                   void *block, size_t block_size, void *args)
 {
-    PyObject *op = op_from_block(block, args, false);
+    PyObject *op = op_from_block_all_gc(block, args);
     if (op == NULL) {
         return true;
     }
-
     struct collection_state *state = (struct collection_state *)args;
+    // The free-threaded GC cost is proportional to the number of objects in
+    // the mimalloc GC heap and so we should include the counts for untracked
+    // and frozen objects as well.  This is especially important if many
+    // tuples have been untracked.
+    state->long_lived_total++;
+    if (!_PyObject_GC_IS_TRACKED(op) || gc_is_frozen(op)) {
+        return true;
+    }
+
     if (gc_is_unreachable(op)) {
         // Disable deferred refcounting for unreachable objects so that they
         // are collected immediately after finalization.
@@ -1209,6 +1230,9 @@ scan_heap_visitor(const mi_heap_t *heap, const 
mi_heap_area_t *area,
         else {
             worklist_push(&state->unreachable, op);
         }
+        // It is possible this object will be resurrected but
+        // for now we assume it will be deallocated.
+        state->long_lived_total--;
         return true;
     }
 
@@ -1222,7 +1246,6 @@ scan_heap_visitor(const mi_heap_t *heap, const 
mi_heap_area_t *area,
     // object is reachable, restore `ob_tid`; we're done with these objects
     gc_restore_tid(op);
     gc_clear_alive(op);
-    state->long_lived_total++;
     return true;
 }
 
@@ -1891,6 +1914,7 @@ handle_resurrected_objects(struct collection_state *state)
                 _PyObject_ASSERT(op, Py_REFCNT(op) > 1);
                 worklist_remove(&iter);
                 merge_refcount(op, -1);  // remove worklist reference
+                state->long_lived_total++;
             }
         }
     }
@@ -2303,9 +2327,6 @@ gc_collect_internal(PyInterpreterState *interp, struct 
collection_state *state,
         }
     }
 
-    // Record the number of live GC objects
-    interp->gc.long_lived_total = state->long_lived_total;
-
     // Find weakref callbacks we will honor (but do not call them).
     find_weakref_callbacks(state);
     _PyEval_StartTheWorld(interp);
@@ -2326,8 +2347,11 @@ gc_collect_internal(PyInterpreterState *interp, struct 
collection_state *state,
     if (err == 0) {
         clear_weakrefs(state);
     }
+    // Record the number of live GC objects
+    interp->gc.long_lived_total = state->long_lived_total;
     _PyEval_StartTheWorld(interp);
 
+
     if (err < 0) {
         cleanup_worklist(&state->unreachable);
         cleanup_worklist(&state->legacy_finalizers);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] gh-142531: Fix free-threaded GC performance regression (gh-142562)

Reply via email to