https://github.com/python/cpython/commit/e89568f0cbcfd055419cdde5ac1248cb48055f90
commit: e89568f0cbcfd055419cdde5ac1248cb48055f90
branch: main
author: Sergey Miryanov <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-05-04T22:14:45+01:00
summary:

GH-148726: Add heap_size to generational GC (#149195)

files:
M Include/internal/pycore_gc.h
M Include/internal/pycore_interp_structs.h
M Lib/test/test_gc.py
M Lib/test/test_gc_stats.py
M Modules/_remote_debugging/clinic/module.c.h
M Modules/_remote_debugging/gc_stats.c
M Modules/_remote_debugging/module.c
M Modules/_testinternalcapi.c
M Python/gc.c

diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index e105677cd2e674..bfe52f42f1141c 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -223,12 +223,14 @@ static inline void _PyObject_GC_TRACK(
                           "object is in generation which is garbage collected",
                           filename, lineno, __func__);
 
-    PyGC_Head *generation0 = _PyInterpreterState_GET()->gc.generation0;
+    struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+    PyGC_Head *generation0 = gcstate->generation0;
     PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
     _PyGCHead_SET_NEXT(last, gc);
     _PyGCHead_SET_PREV(gc, last);
     _PyGCHead_SET_NEXT(gc, generation0);
     generation0->_gc_prev = (uintptr_t)gc;
+    gcstate->heap_size++;
 #endif
 }
 
@@ -263,6 +265,8 @@ static inline void _PyObject_GC_UNTRACK(
     _PyGCHead_SET_PREV(next, prev);
     gc->_gc_next = 0;
     gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
+    struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+    gcstate->heap_size--;
 #endif
 }
 
diff --git a/Include/internal/pycore_interp_structs.h 
b/Include/internal/pycore_interp_structs.h
index 86f018e328656e..2d04c173e85abe 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -191,6 +191,8 @@ struct gc_generation_stats {
     Py_ssize_t candidates;
     // Total duration of the collection in seconds:
     double duration;
+    /* heap_size on the start of the collection */
+    Py_ssize_t heap_size;
 };
 
 #ifdef Py_GIL_DISABLED
@@ -226,7 +228,6 @@ struct _gc_runtime_state {
     /* linked lists of container objects */
 #ifndef Py_GIL_DISABLED
     struct gc_generation generations[NUM_GENERATIONS];
-    PyGC_Head *generation0;
 #else
     struct gc_generation young;
     struct gc_generation old[2];
@@ -244,6 +245,9 @@ struct _gc_runtime_state {
     /* a list of callbacks to be invoked when collection is performed */
     PyObject *callbacks;
 
+    /* The number of live objects. */
+    Py_ssize_t heap_size;
+
     /* This is the number of objects that survived the last full
        collection. It approximates the number of long lived objects
        tracked by the GC.
@@ -269,6 +273,8 @@ struct _gc_runtime_state {
 
     /* Mutex held for gc_should_collect_mem_usage(). */
     PyMutex mutex;
+#else
+    PyGC_Head *generation0;
 #endif
 };
 
@@ -278,7 +284,8 @@ struct _gc_runtime_state {
         { .threshold = 2000, }, \
         { .threshold = 10, }, \
         { .threshold = 10, }, \
-    },
+    }, \
+    .heap_size = 0,
 #else
 #define GC_GENERATION_INIT \
     .young = { .threshold = 2000, }, \
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 88d265cbc21709..3fc084ea6e9c6e 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1288,6 +1288,15 @@ def test_tuple_untrack_counts(self):
         # Use n // 2 just in case some other objects were collected.
         self.assertTrue(new_count - count > (n // 2))
 
+    @requires_gil_enabled('need generational GC')
+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+    def test_heap_size(self):
+        count = _testinternalcapi.get_tracked_heap_size()
+        l = []
+        self.assertEqual(count + 1, _testinternalcapi.get_tracked_heap_size())
+        del l
+        self.assertEqual(count, _testinternalcapi.get_tracked_heap_size())
+
 
 class GCCallbackTests(unittest.TestCase):
     def setUp(self):
diff --git a/Lib/test/test_gc_stats.py b/Lib/test/test_gc_stats.py
index 59365ad45b32c9..bd75924397e76e 100644
--- a/Lib/test/test_gc_stats.py
+++ b/Lib/test/test_gc_stats.py
@@ -22,7 +22,7 @@
 
 GC_STATS_FIELDS = (
     "gen", "iid", "ts_start", "ts_stop", "collections", "collected",
-    "uncollectable", "candidates", "duration")
+    "uncollectable", "candidates", "heap_size", "duration")
 
 
 def get_interpreter_identifiers(gc_stats) -> tuple[int,...]:
diff --git a/Modules/_remote_debugging/clinic/module.c.h 
b/Modules/_remote_debugging/clinic/module.c.h
index 179a7b97dd4e2f..1133db808efaec 100644
--- a/Modules/_remote_debugging/clinic/module.c.h
+++ b/Modules/_remote_debugging/clinic/module.c.h
@@ -601,6 +601,7 @@ 
PyDoc_STRVAR(_remote_debugging_GCMonitor_get_gc_stats__doc__,
 "        - collected: Total number of collected objects.\n"
 "        - uncollectable: Total number of uncollectable objects.\n"
 "        - candidates: Total objects considered and traversed.\n"
+"        - heap_size: number of live objects.\n"
 "        - duration: Total collection time, in seconds.\n"
 "\n"
 "Raises:\n"
@@ -1563,4 +1564,4 @@ _remote_debugging_get_gc_stats(PyObject *module, PyObject 
*const *args, Py_ssize
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=1151e58683dab9f4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=36674f4cb8a653f3 input=a9049054013a1b77]*/
diff --git a/Modules/_remote_debugging/gc_stats.c 
b/Modules/_remote_debugging/gc_stats.c
index 852dc866153192..d5d05edb8ecf5e 100644
--- a/Modules/_remote_debugging/gc_stats.c
+++ b/Modules/_remote_debugging/gc_stats.c
@@ -53,6 +53,7 @@ read_gc_stats(struct gc_stats *stats, int64_t iid, PyObject 
*result,
             SET_FIELD(PyLong_FromSsize_t, items->collected);
             SET_FIELD(PyLong_FromSsize_t, items->uncollectable);
             SET_FIELD(PyLong_FromSsize_t, items->candidates);
+            SET_FIELD(PyLong_FromSsize_t, items->heap_size);
 
             SET_FIELD(PyFloat_FromDouble, items->duration);
 
diff --git a/Modules/_remote_debugging/module.c 
b/Modules/_remote_debugging/module.c
index c840c59971c478..c694e587e7cccb 100644
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@@ -143,6 +143,7 @@ static PyStructSequence_Field GCStatsInfo_fields[] = {
     {"collected", "Total number of collected objects"},
     {"uncollectable", "Total number of uncollectable objects"},
     {"candidates", "Total objects considered and traversed"},
+    {"heap_size", "Number of live objects"},
     {"duration", "Total collection time, in seconds"},
     {NULL}
 };
@@ -151,7 +152,7 @@ PyStructSequence_Desc GCStatsInfo_desc = {
     "_remote_debugging.GCStatsInfo",
     "Information about a garbage collector stats sample",
     GCStatsInfo_fields,
-    9
+    10
 };
 
 /* ============================================================================
@@ -1225,6 +1226,7 @@ Returns a list of GCStatsInfo objects with GC statistics 
data.
         - collected: Total number of collected objects.
         - uncollectable: Total number of uncollectable objects.
         - candidates: Total objects considered and traversed.
+        - heap_size: number of live objects.
         - duration: Total collection time, in seconds.
 
 Raises:
@@ -1235,7 +1237,7 @@ Returns a list of GCStatsInfo objects with GC statistics 
data.
 static PyObject *
 _remote_debugging_GCMonitor_get_gc_stats_impl(GCMonitorObject *self,
                                               int all_interpreters)
-/*[clinic end generated code: output=f73f365725224f7a input=09e647719c65f9e4]*/
+/*[clinic end generated code: output=f73f365725224f7a input=12f7c1a288cf2741]*/
 {
     RemoteDebuggingState *st = RemoteDebugging_GetStateFromType(Py_TYPE(self));
     return get_gc_stats(&self->offsets, all_interpreters, 
st->GCStatsInfo_Type);
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index a07675bb66d8cc..d85b9eb5f7da89 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2731,8 +2731,7 @@ has_deferred_refcount(PyObject *self, PyObject *op)
 static PyObject *
 get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
 {
-    // Generational GC doesn't track heap_size, return -1.
-    return PyLong_FromInt64(-1);
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
 }
 
 static PyObject *
diff --git a/Python/gc.c b/Python/gc.c
index 134da107e1b61d..54ac1b089e503d 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1405,13 +1405,13 @@ add_stats(GCState *gcstate, int gen, struct 
gc_generation_stats *stats)
     memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
 
     cur_stats->ts_start = stats->ts_start;
-
     cur_stats->collections += 1;
     cur_stats->collected += stats->collected;
     cur_stats->uncollectable += stats->uncollectable;
     cur_stats->candidates += stats->candidates;
 
     cur_stats->duration += stats->duration;
+    cur_stats->heap_size = stats->heap_size;
     /* Publish ts_stop last so remote readers do not select a partially
        updated stats record as the latest collection. */
     cur_stats->ts_stop = stats->ts_stop;
@@ -1471,6 +1471,7 @@ gc_collect_main(PyThreadState *tstate, int generation, 
_PyGC_Reason reason)
         invoke_gc_callback(tstate, "start", generation, &stats);
     }
 
+    stats.heap_size = gcstate->heap_size;
     // ignore error: don't interrupt the GC if reading the clock fails
     (void)PyTime_PerfCounterRaw(&stats.ts_start);
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
@@ -2097,6 +2098,8 @@ PyObject_GC_Del(void *op)
     PyGC_Head *g = AS_GC(op);
     if (_PyObject_GC_IS_TRACKED(op)) {
         gc_list_remove(g);
+        GCState *gcstate = get_gc_state();
+        gcstate->heap_size--;
 #ifdef Py_DEBUG
         PyObject *exc = PyErr_GetRaisedException();
         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to