https://github.com/python/cpython/commit/e28477f214276db941e715eebc8cdfb96c1207d9
commit: e28477f214276db941e715eebc8cdfb96c1207d9
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2024-03-22T18:43:25Z
summary:

GH-117108: Change the size of the GC increment to about 1% of the total heap 
size. (GH-117120)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst
M Include/internal/pycore_gc.h
M Lib/test/test_gc.py
M Modules/gcmodule.c
M Python/gc.c
M Python/gc_free_threading.c

diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 9d66e62ba8b5e3..e729616936f03b 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -282,6 +282,7 @@ struct _gc_runtime_state {
     /* a list of callbacks to be invoked when collection is performed */
     PyObject *callbacks;
 
+    Py_ssize_t heap_size;
     Py_ssize_t work_to_do;
     /* Which of the old spaces is the visited space */
     int visited_space;
@@ -321,7 +322,7 @@ extern void _PyGC_Unfreeze(PyInterpreterState *interp);
 /* Number of frozen objects */
 extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp);
 
-extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t 
generation);
+extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, int generation);
 extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject 
*objs);
 
 // Functions to clear types free lists
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index ce01916bcabe4f..57acbac5859e7f 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1058,7 +1058,19 @@ class Z:
         callback.assert_not_called()
         gc.enable()
 
+
+class IncrementalGCTests(unittest.TestCase):
+
+    def setUp(self):
+        # Reenable GC as it is disabled module-wide
+        gc.enable()
+
+    def tearDown(self):
+        gc.disable()
+
     @unittest.skipIf(Py_GIL_DISABLED, "Free threading does not support 
incremental GC")
+    # Use small increments to emulate longer running process in a shorter time
+    @gc_threshold(200, 10)
     def test_incremental_gc_handles_fast_cycle_creation(self):
 
         class LinkedList:
@@ -1080,28 +1092,31 @@ def make_ll(depth):
                 head = LinkedList(head, head.prev)
             return head
 
-        head = make_ll(10000)
-        count = 10000
+        head = make_ll(1000)
+        count = 1000
 
-        # We expect the counts to go negative eventually
-        # as there will some objects we aren't counting,
-        # e.g. the gc stats dicts. The test merely checks
-        # that the counts don't grow.
+        # There will be some objects we aren't counting,
+        # e.g. the gc stats dicts. This test checks
+        # that the counts don't grow, so we try to
+        # correct for the uncounted objects
+        # This is just an estimate.
+        CORRECTION = 20
 
         enabled = gc.isenabled()
         gc.enable()
         olds = []
-        for i in range(1000):
-            newhead = make_ll(200)
-            count += 200
+        for i in range(20_000):
+            newhead = make_ll(20)
+            count += 20
             newhead.surprise = head
             olds.append(newhead)
-            if len(olds) == 50:
+            if len(olds) == 20:
                 stats = gc.get_stats()
                 young = stats[0]
                 incremental = stats[1]
                 old = stats[2]
                 collected = young['collected'] + incremental['collected'] + 
old['collected']
+                count += CORRECTION
                 live = count - collected
                 self.assertLess(live, 25000)
                 del olds[:]
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst
new file mode 100644
index 00000000000000..57ad9606b05e05
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst 
@@ -0,0 +1,3 @@
+The cycle GC now chooses the size of increments based on the total heap
+size, instead of the rate of object creation. This ensures that it can keep
+up with growing heaps.
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 3320e54dd9fe93..8a1b483eddae35 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -326,7 +326,7 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
     }
 
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    return _PyGC_GetObjects(interp, generation);
+    return _PyGC_GetObjects(interp, (int)generation);
 }
 
 /*[clinic input]
diff --git a/Python/gc.c b/Python/gc.c
index d0f4ce38bbe567..2517b86a41fa53 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp)
     if (gcstate->callbacks == NULL) {
         return _PyStatus_NO_MEMORY();
     }
+    gcstate->heap_size = 0;
 
     return _PyStatus_OK();
 }
@@ -1232,7 +1233,7 @@ gc_collect_region(PyThreadState *tstate,
                   struct gc_collection_stats *stats);
 
 static inline Py_ssize_t
-gc_list_set_space(PyGC_Head *list, uintptr_t space)
+gc_list_set_space(PyGC_Head *list, int space)
 {
     Py_ssize_t size = 0;
     PyGC_Head *gc;
@@ -1258,9 +1259,9 @@ gc_list_set_space(PyGC_Head *list, uintptr_t space)
  * N == 1.4 (1 + 4/threshold)
  */
 
-/* Multiply by 4 so that the default incremental threshold of 10
- * scans objects at 20% the rate of object creation */
-#define SCAN_RATE_MULTIPLIER 2
+/* Divide by 10, so that the default incremental threshold of 10
+ * scans objects at 1% of the heap size */
+#define SCAN_RATE_DIVISOR 10
 
 static void
 add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
@@ -1313,7 +1314,7 @@ gc_collect_young(PyThreadState *tstate,
     if (scale_factor < 1) {
         scale_factor = 1;
     }
-    gcstate->work_to_do += survivor_count + survivor_count * 
SCAN_RATE_MULTIPLIER / scale_factor;
+    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / 
scale_factor;
     add_stats(gcstate, 0, stats);
 }
 
@@ -1384,12 +1385,12 @@ expand_region_transitively_reachable(PyGC_Head 
*container, PyGC_Head *gc, GCStat
 static void
 completed_cycle(GCState *gcstate)
 {
+#ifdef Py_DEBUG
     PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
     assert(gc_list_is_empty(not_visited));
+#endif
     gcstate->visited_space = flip_old_space(gcstate->visited_space);
-    if (gcstate->work_to_do > 0) {
-        gcstate->work_to_do = 0;
-    }
+    gcstate->work_to_do = 0;
 }
 
 static void
@@ -1404,13 +1405,13 @@ gc_collect_increment(PyThreadState *tstate, struct 
gc_collection_stats *stats)
     if (scale_factor < 1) {
         scale_factor = 1;
     }
-    Py_ssize_t increment_size = 0;
     gc_list_merge(&gcstate->young.head, &increment);
     gcstate->young.count = 0;
     if (gcstate->visited_space) {
         /* objects in visited space have bit set, so we set it here */
         gc_list_set_space(&increment, 1);
     }
+    Py_ssize_t increment_size = 0;
     while (increment_size < gcstate->work_to_do) {
         if (gc_list_is_empty(not_visited)) {
             break;
@@ -1425,14 +1426,11 @@ gc_collect_increment(PyThreadState *tstate, struct 
gc_collection_stats *stats)
     PyGC_Head survivors;
     gc_list_init(&survivors);
     gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
-    Py_ssize_t survivor_count = gc_list_size(&survivors);
     gc_list_merge(&survivors, visited);
     assert(gc_list_is_empty(&increment));
-    gcstate->work_to_do += survivor_count + survivor_count * 
SCAN_RATE_MULTIPLIER / scale_factor;
+    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / 
scale_factor;
     gcstate->work_to_do -= increment_size;
-    if (gcstate->work_to_do < 0) {
-        gcstate->work_to_do = 0;
-    }
+
     validate_old(gcstate);
     add_stats(gcstate, 1, stats);
     if (gc_list_is_empty(not_visited)) {
@@ -1678,7 +1676,7 @@ _PyGC_GetReferrers(PyInterpreterState *interp, PyObject 
*objs)
 }
 
 PyObject *
-_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
+_PyGC_GetObjects(PyInterpreterState *interp, int generation)
 {
     assert(generation >= -1 && generation < NUM_GENERATIONS);
     GCState *gcstate = &interp->gc;
@@ -1974,6 +1972,7 @@ _PyObject_GC_Link(PyObject *op)
     gc->_gc_next = 0;
     gc->_gc_prev = 0;
     gcstate->young.count++; /* number of allocated GC objects */
+    gcstate->heap_size++;
     if (gcstate->young.count > gcstate->young.threshold &&
         gcstate->enabled &&
         gcstate->young.threshold &&
@@ -2095,6 +2094,7 @@ PyObject_GC_Del(void *op)
     if (gcstate->young.count > 0) {
         gcstate->young.count--;
     }
+    gcstate->heap_size--;
     PyObject_Free(((char *)op)-presize);
 }
 
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 52c79c02099b53..69ce22a1e83b62 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -1305,7 +1305,7 @@ visit_get_objects(const mi_heap_t *heap, const 
mi_heap_area_t *area,
 }
 
 PyObject *
-_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
+_PyGC_GetObjects(PyInterpreterState *interp, int generation)
 {
     PyObject *result = PyList_New(0);
     if (!result) {

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to