https://github.com/python/cpython/commit/e89568f0cbcfd055419cdde5ac1248cb48055f90
commit: e89568f0cbcfd055419cdde5ac1248cb48055f90
branch: main
author: Sergey Miryanov <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-05-04T22:14:45+01:00
summary:
GH-148726: Add heap_size to generational GC (#149195)
files:
M Include/internal/pycore_gc.h
M Include/internal/pycore_interp_structs.h
M Lib/test/test_gc.py
M Lib/test/test_gc_stats.py
M Modules/_remote_debugging/clinic/module.c.h
M Modules/_remote_debugging/gc_stats.c
M Modules/_remote_debugging/module.c
M Modules/_testinternalcapi.c
M Python/gc.c
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index e105677cd2e674..bfe52f42f1141c 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -223,12 +223,14 @@ static inline void _PyObject_GC_TRACK(
"object is in generation which is garbage collected",
filename, lineno, __func__);
- PyGC_Head *generation0 = _PyInterpreterState_GET()->gc.generation0;
+ struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+ PyGC_Head *generation0 = gcstate->generation0;
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
_PyGCHead_SET_NEXT(gc, generation0);
generation0->_gc_prev = (uintptr_t)gc;
+ gcstate->heap_size++;
#endif
}
@@ -263,6 +265,8 @@ static inline void _PyObject_GC_UNTRACK(
_PyGCHead_SET_PREV(next, prev);
gc->_gc_next = 0;
gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
+ struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+ gcstate->heap_size--;
#endif
}
diff --git a/Include/internal/pycore_interp_structs.h
b/Include/internal/pycore_interp_structs.h
index 86f018e328656e..2d04c173e85abe 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -191,6 +191,8 @@ struct gc_generation_stats {
Py_ssize_t candidates;
// Total duration of the collection in seconds:
double duration;
+ /* heap_size on the start of the collection */
+ Py_ssize_t heap_size;
};
#ifdef Py_GIL_DISABLED
@@ -226,7 +228,6 @@ struct _gc_runtime_state {
/* linked lists of container objects */
#ifndef Py_GIL_DISABLED
struct gc_generation generations[NUM_GENERATIONS];
- PyGC_Head *generation0;
#else
struct gc_generation young;
struct gc_generation old[2];
@@ -244,6 +245,9 @@ struct _gc_runtime_state {
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;
+ /* The number of live objects. */
+ Py_ssize_t heap_size;
+
/* This is the number of objects that survived the last full
collection. It approximates the number of long lived objects
tracked by the GC.
@@ -269,6 +273,8 @@ struct _gc_runtime_state {
/* Mutex held for gc_should_collect_mem_usage(). */
PyMutex mutex;
+#else
+ PyGC_Head *generation0;
#endif
};
@@ -278,7 +284,8 @@ struct _gc_runtime_state {
{ .threshold = 2000, }, \
{ .threshold = 10, }, \
{ .threshold = 10, }, \
- },
+ }, \
+ .heap_size = 0,
#else
#define GC_GENERATION_INIT \
.young = { .threshold = 2000, }, \
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 88d265cbc21709..3fc084ea6e9c6e 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1288,6 +1288,15 @@ def test_tuple_untrack_counts(self):
# Use n // 2 just in case some other objects were collected.
self.assertTrue(new_count - count > (n // 2))
+ @requires_gil_enabled('need generational GC')
+ @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+ def test_heap_size(self):
+ count = _testinternalcapi.get_tracked_heap_size()
+ l = []
+ self.assertEqual(count + 1, _testinternalcapi.get_tracked_heap_size())
+ del l
+ self.assertEqual(count, _testinternalcapi.get_tracked_heap_size())
+
class GCCallbackTests(unittest.TestCase):
def setUp(self):
diff --git a/Lib/test/test_gc_stats.py b/Lib/test/test_gc_stats.py
index 59365ad45b32c9..bd75924397e76e 100644
--- a/Lib/test/test_gc_stats.py
+++ b/Lib/test/test_gc_stats.py
@@ -22,7 +22,7 @@
GC_STATS_FIELDS = (
"gen", "iid", "ts_start", "ts_stop", "collections", "collected",
- "uncollectable", "candidates", "duration")
+ "uncollectable", "candidates", "heap_size", "duration")
def get_interpreter_identifiers(gc_stats) -> tuple[int,...]:
diff --git a/Modules/_remote_debugging/clinic/module.c.h
b/Modules/_remote_debugging/clinic/module.c.h
index 179a7b97dd4e2f..1133db808efaec 100644
--- a/Modules/_remote_debugging/clinic/module.c.h
+++ b/Modules/_remote_debugging/clinic/module.c.h
@@ -601,6 +601,7 @@
PyDoc_STRVAR(_remote_debugging_GCMonitor_get_gc_stats__doc__,
" - collected: Total number of collected objects.\n"
" - uncollectable: Total number of uncollectable objects.\n"
" - candidates: Total objects considered and traversed.\n"
+" - heap_size: number of live objects.\n"
" - duration: Total collection time, in seconds.\n"
"\n"
"Raises:\n"
@@ -1563,4 +1564,4 @@ _remote_debugging_get_gc_stats(PyObject *module, PyObject
*const *args, Py_ssize
exit:
return return_value;
}
-/*[clinic end generated code: output=1151e58683dab9f4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=36674f4cb8a653f3 input=a9049054013a1b77]*/
diff --git a/Modules/_remote_debugging/gc_stats.c
b/Modules/_remote_debugging/gc_stats.c
index 852dc866153192..d5d05edb8ecf5e 100644
--- a/Modules/_remote_debugging/gc_stats.c
+++ b/Modules/_remote_debugging/gc_stats.c
@@ -53,6 +53,7 @@ read_gc_stats(struct gc_stats *stats, int64_t iid, PyObject
*result,
SET_FIELD(PyLong_FromSsize_t, items->collected);
SET_FIELD(PyLong_FromSsize_t, items->uncollectable);
SET_FIELD(PyLong_FromSsize_t, items->candidates);
+ SET_FIELD(PyLong_FromSsize_t, items->heap_size);
SET_FIELD(PyFloat_FromDouble, items->duration);
diff --git a/Modules/_remote_debugging/module.c
b/Modules/_remote_debugging/module.c
index c840c59971c478..c694e587e7cccb 100644
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@@ -143,6 +143,7 @@ static PyStructSequence_Field GCStatsInfo_fields[] = {
{"collected", "Total number of collected objects"},
{"uncollectable", "Total number of uncollectable objects"},
{"candidates", "Total objects considered and traversed"},
+ {"heap_size", "Number of live objects"},
{"duration", "Total collection time, in seconds"},
{NULL}
};
@@ -151,7 +152,7 @@ PyStructSequence_Desc GCStatsInfo_desc = {
"_remote_debugging.GCStatsInfo",
"Information about a garbage collector stats sample",
GCStatsInfo_fields,
- 9
+ 10
};
/* ============================================================================
@@ -1225,6 +1226,7 @@ Returns a list of GCStatsInfo objects with GC statistics
data.
- collected: Total number of collected objects.
- uncollectable: Total number of uncollectable objects.
- candidates: Total objects considered and traversed.
+ - heap_size: number of live objects.
- duration: Total collection time, in seconds.
Raises:
@@ -1235,7 +1237,7 @@ Returns a list of GCStatsInfo objects with GC statistics
data.
static PyObject *
_remote_debugging_GCMonitor_get_gc_stats_impl(GCMonitorObject *self,
int all_interpreters)
-/*[clinic end generated code: output=f73f365725224f7a input=09e647719c65f9e4]*/
+/*[clinic end generated code: output=f73f365725224f7a input=12f7c1a288cf2741]*/
{
RemoteDebuggingState *st = RemoteDebugging_GetStateFromType(Py_TYPE(self));
return get_gc_stats(&self->offsets, all_interpreters,
st->GCStatsInfo_Type);
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index a07675bb66d8cc..d85b9eb5f7da89 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2731,8 +2731,7 @@ has_deferred_refcount(PyObject *self, PyObject *op)
static PyObject *
get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
{
- // Generational GC doesn't track heap_size, return -1.
- return PyLong_FromInt64(-1);
+ return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
}
static PyObject *
diff --git a/Python/gc.c b/Python/gc.c
index 134da107e1b61d..54ac1b089e503d 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1405,13 +1405,13 @@ add_stats(GCState *gcstate, int gen, struct
gc_generation_stats *stats)
memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
cur_stats->ts_start = stats->ts_start;
-
cur_stats->collections += 1;
cur_stats->collected += stats->collected;
cur_stats->uncollectable += stats->uncollectable;
cur_stats->candidates += stats->candidates;
cur_stats->duration += stats->duration;
+ cur_stats->heap_size = stats->heap_size;
/* Publish ts_stop last so remote readers do not select a partially
updated stats record as the latest collection. */
cur_stats->ts_stop = stats->ts_stop;
@@ -1471,6 +1471,7 @@ gc_collect_main(PyThreadState *tstate, int generation,
_PyGC_Reason reason)
invoke_gc_callback(tstate, "start", generation, &stats);
}
+ stats.heap_size = gcstate->heap_size;
// ignore error: don't interrupt the GC if reading the clock fails
(void)PyTime_PerfCounterRaw(&stats.ts_start);
if (gcstate->debug & _PyGC_DEBUG_STATS) {
@@ -2097,6 +2098,8 @@ PyObject_GC_Del(void *op)
PyGC_Head *g = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op)) {
gc_list_remove(g);
+ GCState *gcstate = get_gc_state();
+ gcstate->heap_size--;
#ifdef Py_DEBUG
PyObject *exc = PyErr_GetRaisedException();
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]