https://github.com/python/cpython/commit/227b9d326ec7eba35942a4eb451c7db244a33a6c
commit: 227b9d326ec7eba35942a4eb451c7db244a33a6c
branch: main
author: Brandt Bucher <[email protected]>
committer: brandtbucher <[email protected]>
date: 2025-11-22T21:59:14Z
summary:

GH-140638: Add a GC "candidates" stat (GH-141814)

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst
M Doc/library/gc.rst
M Include/internal/pycore_interp_structs.h
M Lib/test/test_gc.py
M Modules/gcmodule.c
M Python/gc.c
M Python/gc_free_threading.c

diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 8e6f2342a2869a..79a8c38626f002 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -110,13 +110,16 @@ The :mod:`gc` module provides the following functions:
      to be uncollectable (and were therefore moved to the :data:`garbage`
      list) inside this generation;
 
+   * ``candidates`` is the total number of objects in this generation which 
were
+     considered for collection and traversed;
+
    * ``duration`` is the total time in seconds spent in collections for this
      generation.
 
    .. versionadded:: 3.4
 
    .. versionchanged:: next
-      Add ``duration``.
+      Add ``duration`` and ``candidates``.
 
 
 .. function:: set_threshold(threshold0, [threshold1, [threshold2]])
@@ -319,6 +322,9 @@ values but should not rebind them):
       "uncollectable": When *phase* is "stop", the number of objects
       that could not be collected and were put in :data:`garbage`.
 
+      "candidates": When *phase* is "stop", the total number of objects in this
+      generation which were considered for collection and traversed.
+
       "duration": When *phase* is "stop", the time in seconds spent in the
       collection.
 
@@ -335,7 +341,7 @@ values but should not rebind them):
    .. versionadded:: 3.3
 
    .. versionchanged:: next
-      Add "duration".
+      Add "duration" and "candidates".
 
 
 The following constants are provided for use with :func:`set_debug`:
diff --git a/Include/internal/pycore_interp_structs.h 
b/Include/internal/pycore_interp_structs.h
index d9f5d444a2dc07..6b3d5711b92971 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -179,6 +179,8 @@ struct gc_collection_stats {
     Py_ssize_t collected;
     /* total number of uncollectable objects (put into gc.garbage) */
     Py_ssize_t uncollectable;
+    // Total number of objects considered for collection and traversed:
+    Py_ssize_t candidates;
     // Duration of the collection in seconds:
     double duration;
 };
@@ -191,6 +193,8 @@ struct gc_generation_stats {
     Py_ssize_t collected;
     /* total number of uncollectable objects (put into gc.garbage) */
     Py_ssize_t uncollectable;
+    // Total number of objects considered for collection and traversed:
+    Py_ssize_t candidates;
     // Duration of the collection in seconds:
     double duration;
 };
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index e65da0f61d944f..ec5df4d20e7085 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -846,11 +846,14 @@ def test_get_stats(self):
         self.assertEqual(len(stats), 3)
         for st in stats:
             self.assertIsInstance(st, dict)
-            self.assertEqual(set(st),
-                             {"collected", "collections", "uncollectable", 
"duration"})
+            self.assertEqual(
+                set(st),
+                {"collected", "collections", "uncollectable", "candidates", 
"duration"}
+            )
             self.assertGreaterEqual(st["collected"], 0)
             self.assertGreaterEqual(st["collections"], 0)
             self.assertGreaterEqual(st["uncollectable"], 0)
+            self.assertGreaterEqual(st["candidates"], 0)
             self.assertGreaterEqual(st["duration"], 0)
         # Check that collection counts are incremented correctly
         if gc.isenabled():
@@ -865,7 +868,7 @@ def test_get_stats(self):
         self.assertGreater(new[0]["duration"], old[0]["duration"])
         self.assertEqual(new[1]["duration"], old[1]["duration"])
         self.assertEqual(new[2]["duration"], old[2]["duration"])
-        for stat in ["collected", "uncollectable"]:
+        for stat in ["collected", "uncollectable", "candidates"]:
             self.assertGreaterEqual(new[0][stat], old[0][stat])
             self.assertEqual(new[1][stat], old[1][stat])
             self.assertEqual(new[2][stat], old[2][stat])
@@ -877,7 +880,7 @@ def test_get_stats(self):
         self.assertEqual(new[0]["duration"], old[0]["duration"])
         self.assertEqual(new[1]["duration"], old[1]["duration"])
         self.assertGreater(new[2]["duration"], old[2]["duration"])
-        for stat in ["collected", "uncollectable"]:
+        for stat in ["collected", "uncollectable", "candidates"]:
             self.assertEqual(new[0][stat], old[0][stat])
             self.assertEqual(new[1][stat], old[1][stat])
             self.assertGreaterEqual(new[2][stat], old[2][stat])
@@ -1316,6 +1319,7 @@ def test_collect(self):
             self.assertIn("generation", info)
             self.assertIn("collected", info)
             self.assertIn("uncollectable", info)
+            self.assertIn("candidates", info)
             self.assertIn("duration", info)
 
     def test_collect_generation(self):
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst
new file mode 100644
index 00000000000000..e3af941523cb75
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst
@@ -0,0 +1,2 @@
+Expose a ``"candidates"`` stat in :func:`gc.get_stats` and
+:data:`gc.callbacks`.
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 6a44d8a9d17aea..4c286f5c12cc7d 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -358,10 +358,11 @@ gc_get_stats_impl(PyObject *module)
     for (i = 0; i < NUM_GENERATIONS; i++) {
         PyObject *dict;
         st = &stats[i];
-        dict = Py_BuildValue("{snsnsnsd}",
+        dict = Py_BuildValue("{snsnsnsnsd}",
                              "collections", st->collections,
                              "collected", st->collected,
                              "uncollectable", st->uncollectable,
+                             "candidates", st->candidates,
                              "duration", st->duration
                             );
         if (dict == NULL)
diff --git a/Python/gc.c b/Python/gc.c
index 7e3e93e6e01be2..d067a6144b0763 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head)
 /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
  * PREV_MASK_COLLECTING bit is set for all objects in containers.
  */
-static void
+static Py_ssize_t
 update_refs(PyGC_Head *containers)
 {
     PyGC_Head *next;
     PyGC_Head *gc = GC_NEXT(containers);
+    Py_ssize_t candidates = 0;
 
     while (gc != containers) {
         next = GC_NEXT(gc);
@@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers)
          */
         _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
         gc = next;
+        candidates++;
     }
+    return candidates;
 }
 
 /* A traversal callback for subtract_refs. */
@@ -1240,7 +1243,7 @@ flag set but it does not clear it to skip unnecessary 
iteration. Before the
 flag is cleared (for example, by using 'clear_unreachable_mask' function or
 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
 list and we can not use most gc_list_* functions for it. */
-static inline void
+static inline Py_ssize_t
 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
     validate_list(base, collecting_clear_unreachable_clear);
     /* Using ob_refcnt and gc_refs, calculate which objects in the
@@ -1248,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head 
*unreachable) {
      * refcount greater than 0 when all the references within the
      * set are taken into account).
      */
-    update_refs(base);  // gc_prev is used for gc_refs
+    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
     subtract_refs(base);
 
     /* Leave everything reachable from outside base in base, and move
@@ -1289,6 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head 
*unreachable) {
     move_unreachable(base, unreachable);  // gc_prev is pointer again
     validate_list(base, collecting_clear_unreachable_clear);
     validate_list(unreachable, collecting_set_unreachable_set);
+    return candidates;
 }
 
 /* Handle objects that may have resurrected after a call to 
'finalize_garbage', moving
@@ -1366,6 +1370,7 @@ add_stats(GCState *gcstate, int gen, struct 
gc_collection_stats *stats)
     gcstate->generation_stats[gen].duration += stats->duration;
     gcstate->generation_stats[gen].collected += stats->collected;
     gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
+    gcstate->generation_stats[gen].candidates += stats->candidates;
     gcstate->generation_stats[gen].collections += 1;
 }
 
@@ -1662,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct 
gc_collection_stats *stats)
         Py_ssize_t objects_marked = mark_at_start(tstate);
         GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
         gcstate->work_to_do -= objects_marked;
+        stats->candidates += objects_marked;
         validate_spaces(gcstate);
         return;
     }
@@ -1754,7 +1760,7 @@ gc_collect_region(PyThreadState *tstate,
     assert(!_PyErr_Occurred(tstate));
 
     gc_list_init(&unreachable);
-    deduce_unreachable(from, &unreachable);
+    stats->candidates = deduce_unreachable(from, &unreachable);
     validate_consistent_old_space(from);
     untrack_tuples(from);
 
@@ -1844,10 +1850,11 @@ do_gc_callback(GCState *gcstate, const char *phase,
     assert(PyList_CheckExact(gcstate->callbacks));
     PyObject *info = NULL;
     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
-        info = Py_BuildValue("{sisnsnsd}",
+        info = Py_BuildValue("{sisnsnsnsd}",
             "generation", generation,
             "collected", stats->collected,
             "uncollectable", stats->uncollectable,
+            "candidates", stats->candidates,
             "duration", stats->duration);
         if (info == NULL) {
             PyErr_FormatUnraisable("Exception ignored while invoking gc 
callbacks");
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 9f424db8894524..1717603b947f90 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -100,6 +100,7 @@ struct collection_state {
     int skip_deferred_objects;
     Py_ssize_t collected;
     Py_ssize_t uncollectable;
+    Py_ssize_t candidates;
     Py_ssize_t long_lived_total;
     struct worklist unreachable;
     struct worklist legacy_finalizers;
@@ -975,15 +976,12 @@ static bool
 update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
             void *block, size_t block_size, void *args)
 {
+    struct collection_state *state = (struct collection_state *)args;
     PyObject *op = op_from_block(block, args, false);
     if (op == NULL) {
         return true;
     }
 
-    if (gc_is_alive(op)) {
-        return true;
-    }
-
     // Exclude immortal objects from garbage collection
     if (_Py_IsImmortal(op)) {
         op->ob_tid = 0;
@@ -991,6 +989,11 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t 
*area,
         gc_clear_unreachable(op);
         return true;
     }
+    // Marked objects count as candidates, immortals don't:
+    state->candidates++;
+    if (gc_is_alive(op)) {
+        return true;
+    }
 
     Py_ssize_t refcount = Py_REFCNT(op);
     if (_PyObject_HasDeferredRefcount(op)) {
@@ -1911,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state)
 static void
 invoke_gc_callback(PyThreadState *tstate, const char *phase,
                    int generation, Py_ssize_t collected,
-                   Py_ssize_t uncollectable, double duration)
+                   Py_ssize_t uncollectable, Py_ssize_t candidates,
+                   double duration)
 {
     assert(!_PyErr_Occurred(tstate));
 
@@ -1925,10 +1929,11 @@ invoke_gc_callback(PyThreadState *tstate, const char 
*phase,
     assert(PyList_CheckExact(gcstate->callbacks));
     PyObject *info = NULL;
     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
-        info = Py_BuildValue("{sisnsnsd}",
+        info = Py_BuildValue("{sisnsnsnsd}",
             "generation", generation,
             "collected", collected,
             "uncollectable", uncollectable,
+            "candidates", candidates,
             "duration", duration);
         if (info == NULL) {
             PyErr_FormatUnraisable("Exception ignored while "
@@ -2372,7 +2377,7 @@ gc_collect_main(PyThreadState *tstate, int generation, 
_PyGC_Reason reason)
     GC_STAT_ADD(generation, collections, 1);
 
     if (reason != _Py_GC_REASON_SHUTDOWN) {
-        invoke_gc_callback(tstate, "start", generation, 0, 0, 0);
+        invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0);
     }
 
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
@@ -2427,6 +2432,7 @@ gc_collect_main(PyThreadState *tstate, int generation, 
_PyGC_Reason reason)
     stats->collected += m;
     stats->uncollectable += n;
     stats->duration += duration;
+    stats->candidates += state.candidates;
 
     GC_STAT_ADD(generation, objects_collected, m);
 #ifdef Py_STATS
@@ -2445,7 +2451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, 
_PyGC_Reason reason)
     }
 
     if (reason != _Py_GC_REASON_SHUTDOWN) {
-        invoke_gc_callback(tstate, "stop", generation, m, n, duration);
+        invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, 
duration);
     }
 
     assert(!_PyErr_Occurred(tstate));

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to