https://github.com/python/cpython/commit/350c58ba4ee13019b0cde70b49bfeadc63f4ceb8 commit: 350c58ba4ee13019b0cde70b49bfeadc63f4ceb8 branch: main author: Neil Schemenauer <nas-git...@arctrix.com> committer: nascheme <nas-git...@arctrix.com> date: 2025-08-07T16:32:17-07:00 summary:
GH-135552: Make the GC clear weakrefs later (GH-136189) Fix a bug caused by the garbage collector clearing weakrefs too early. The weakrefs in the ``tp_subclasses`` dictionary are needed in order to correctly invalidate type caches (for example, by calling ``PyType_Modified()``). Clearing weakrefs before calling finalizers causes the caches to not be correctly invalidated. That can cause crashes since the caches can refer to invalid objects. Defer the clearing of weakrefs without callbacks until after finalizers are executed. files: A Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst M InternalDocs/garbage_collector.md M Lib/test/test_finalization.py M Lib/test/test_gc.py M Lib/test/test_io.py M Modules/_datetimemodule.c M Modules/gc_weakref.txt M Python/gc.c M Python/gc_free_threading.c diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 9c35684c945b3e..a7d872f3ec4392 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -329,15 +329,16 @@ Once the GC knows the list of unreachable objects, a very delicate process start with the objective of completely destroying these objects. Roughly, the process follows these steps in order: -1. Handle and clear weak references (if any). Weak references to unreachable objects - are set to `None`. If the weak reference has an associated callback, the callback - is enqueued to be called once the clearing of weak references is finished. We only - invoke callbacks for weak references that are themselves reachable. If both the weak - reference and the pointed-to object are unreachable we do not execute the callback. - This is partly for historical reasons: the callback could resurrect an unreachable - object and support for weak references predates support for object resurrection. - Ignoring the weak reference's callback is fine because both the object and the weakref - are going away, so it's legitimate to say the weak reference is going away first. +1. Handle weak references with callbacks (if any). If the weak reference has + an associated callback, the callback is enqueued to be called after the weak + reference is cleared. We only invoke callbacks for weak references that + are themselves reachable. If both the weak reference and the pointed-to + object are unreachable we do not execute the callback. This is partly for + historical reasons: the callback could resurrect an unreachable object + and support for weak references predates support for object resurrection. + Ignoring the weak reference's callback is fine because both the object and + the weakref are going away, so it's legitimate to say the weak reference is + going away first. 2. If an object has legacy finalizers (`tp_del` slot) move it to the `gc.garbage` list. 3. Call the finalizers (`tp_finalize` slot) and mark the objects as already @@ -346,7 +347,12 @@ follows these steps in order: 4. Deal with resurrected objects. If some objects have been resurrected, the GC finds the new subset of objects that are still unreachable by running the cycle detection algorithm again and continues with them. -5. Call the `tp_clear` slot of every object so all internal links are broken and +5. Clear any weak references that still refer to unreachable objects. The + `wr_object` attribute for these weakrefs are set to `None`. Note that some + of these weak references maybe have been newly created during the running of + finalizers in step 3. Also, clear any weak references that are part of the + unreachable set. +6. Call the `tp_clear` slot of every object so all internal links are broken and the reference counts fall to 0, triggering the destruction of all unreachable objects. diff --git a/Lib/test/test_finalization.py b/Lib/test/test_finalization.py index 42871f8a09b16b..9dd68cf8d57413 100644 --- a/Lib/test/test_finalization.py +++ b/Lib/test/test_finalization.py @@ -174,7 +174,7 @@ def test_simple(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) @@ -188,12 +188,12 @@ def test_simple_resurrect(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors(ids) - self.assertIsNot(wr(), None) + self.assertIsNotNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) @support.cpython_only def test_non_gc(self): @@ -265,7 +265,7 @@ def test_simple(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) @@ -276,19 +276,24 @@ def test_simple_resurrect(self): s = SelfCycleResurrector() ids = [id(s)] wr = weakref.ref(s) + wrc = weakref.ref(s, lambda x: None) del s gc.collect() self.assert_del_calls(ids) self.assert_survivors(ids) - # XXX is this desirable? - self.assertIs(wr(), None) + # This used to be None because weakrefs were cleared before + # calling finalizers. Now they are cleared after. + self.assertIsNotNone(wr()) + # A weakref with a callback is still cleared before calling + # finalizers. + self.assertIsNone(wrc()) # When trying to destroy the object a second time, __del__ # isn't called anymore (and the object isn't resurrected). self.clear_survivors() gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) def test_simple_suicide(self): # Test the GC is able to deal with an object that kills its last @@ -301,11 +306,11 @@ def test_simple_suicide(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) gc.collect() self.assert_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) class ChainedBase: @@ -378,18 +383,27 @@ def check_non_resurrecting_chain(self, classes): def check_resurrecting_chain(self, classes): N = len(classes) + def dummy_callback(ref): + pass with SimpleBase.test(): nodes = self.build_chain(classes) N = len(nodes) ids = [id(s) for s in nodes] survivor_ids = [id(s) for s in nodes if isinstance(s, SimpleResurrector)] wrs = [weakref.ref(s) for s in nodes] + wrcs = [weakref.ref(s, dummy_callback) for s in nodes] del nodes gc.collect() self.assert_del_calls(ids) self.assert_survivors(survivor_ids) - # XXX desirable? - self.assertEqual([wr() for wr in wrs], [None] * N) + for wr in wrs: + # These values used to be None because weakrefs were cleared + # before calling finalizers. Now they are cleared after. + self.assertIsNotNone(wr()) + for wr in wrcs: + # Weakrefs with callbacks are still cleared before calling + # finalizers. + self.assertIsNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) @@ -491,7 +505,7 @@ def test_legacy(self): self.assert_del_calls(ids) self.assert_tp_del_calls(ids) self.assert_survivors([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) gc.collect() self.assert_del_calls(ids) self.assert_tp_del_calls(ids) @@ -507,13 +521,13 @@ def test_legacy_resurrect(self): self.assert_tp_del_calls(ids) self.assert_survivors(ids) # weakrefs are cleared before tp_del is called. - self.assertIs(wr(), None) + self.assertIsNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) self.assert_tp_del_calls(ids * 2) self.assert_survivors(ids) - self.assertIs(wr(), None) + self.assertIsNone(wr()) def test_legacy_self_cycle(self): # Self-cycles with legacy finalizers end up in gc.garbage. @@ -527,11 +541,11 @@ def test_legacy_self_cycle(self): self.assert_tp_del_calls([]) self.assert_survivors([]) self.assert_garbage(ids) - self.assertIsNot(wr(), None) + self.assertIsNotNone(wr()) # Break the cycle to allow collection gc.garbage[0].ref = None self.assert_garbage([]) - self.assertIs(wr(), None) + self.assertIsNone(wr()) if __name__ == "__main__": diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 96ebb23f73de9d..3ec211531c4c70 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -309,6 +309,26 @@ def func(): self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) + def test_datetime_weakref_cycle(self): + # https://github.com/python/cpython/issues/132413 + # If the weakref used by the datetime extension gets cleared by the GC (due to being + # in an unreachable cycle) then datetime functions would crash (get_module_state() + # was returning a NULL pointer). This bug is fixed by clearing weakrefs without + # callbacks *after* running finalizers. + code = """if 1: + import _datetime + class C: + def __del__(self): + print('__del__ called') + _datetime.timedelta(days=1) # crash? + + l = [C()] + l.append(l) + """ + rc, stdout, stderr = assert_python_ok("-c", code) + self.assertEqual(rc, 0) + self.assertEqual(stdout.strip(), b'__del__ called') + @refcount_test def test_frame(self): def f(): @@ -658,9 +678,8 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # If the callback resurrected one of these guys, the instance - # would be damaged, with an empty __dict__. - self.assertEqual(x, None) + # The weakref should be cleared before executing the callback. + self.assertIsNone(x) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -1335,6 +1354,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820c(self): # Corresponds to temp2c.py in the bug report. This is pretty # elaborate. @@ -1410,6 +1430,7 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index b487bcabf01ca4..92be2763e5ed1e 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -808,7 +808,12 @@ def test_closefd_attr(self): def test_garbage_collection(self): # FileIO objects are collected, and collecting them flushes # all data to disk. - with warnings_helper.check_warnings(('', ResourceWarning)): + # + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) f = self.FileIO(os_helper.TESTFN, "wb") f.write(b"abcxxx") f.f = f @@ -1809,7 +1814,11 @@ def test_garbage_collection(self): # C BufferedReader objects are collected. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.f = f @@ -2158,7 +2167,11 @@ def test_garbage_collection(self): # all data to disk. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.write(b"123xxx") @@ -4080,7 +4093,8 @@ def test_garbage_collection(self): # C TextIOWrapper objects are collected, and collecting them flushes # all data to disk. # The Python version has __del__, so it ends in gc.garbage instead. - with warnings_helper.check_warnings(('', ResourceWarning)): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "wb") b = self.BufferedWriter(rawio) t = self.TextIOWrapper(b, encoding="ascii") diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst new file mode 100644 index 00000000000000..ea30a43fc25d41 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst @@ -0,0 +1,7 @@ +Fix a bug caused by the garbage collector clearing weakrefs too early. The +weakrefs in the ``tp_subclasses`` dictionary are needed in order to correctly +invalidate type caches (for example, by calling ``PyType_Modified()``). +Clearing weakrefs before calling finalizers causes the caches to not be +correctly invalidated. That can cause crashes since the caches can refer to +invalid objects. Defer the clearing of weakrefs without callbacks until after +finalizers are executed. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 01039dfeec0719..3f7afbe27d7358 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -214,7 +214,7 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { goto error; } - if (ref != NULL) { + if (ref != NULL && ref != Py_None) { PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); /* We only need "current" for pointer comparison. */ diff --git a/Modules/gc_weakref.txt b/Modules/gc_weakref.txt index f53fb99dd6cdcb..c3b8cc743ccd21 100644 --- a/Modules/gc_weakref.txt +++ b/Modules/gc_weakref.txt @@ -1,6 +1,16 @@ Intro ===== +************************************************************************** +Note: this document was written long ago, before PEP 442 (safe object +finalization) was implemented. While that has changed some things, this +document is still mostly accurate. Just note that the rules being discussed +here apply to the unreachable set of objects *after* non-legacy finalizers +have been called. Also, the clearing of weakrefs has been changed to happen +later in the collection (after running finalizers but before tp_clear is +called). +************************************************************************** + The basic rule for dealing with weakref callbacks (and __del__ methods too, for that matter) during cyclic gc: diff --git a/Python/gc.c b/Python/gc.c index 807bd5b65b2332..1050eae60fd337 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -858,60 +858,65 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } -/* Clear all weakrefs to unreachable objects, and if such a weakref has a - * callback, invoke it if necessary. Note that it's possible for such - * weakrefs to be outside the unreachable set -- indeed, those are precisely - * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for - * overview & some details. Some weakrefs with callbacks may be reclaimed - * directly by this routine; the number reclaimed is the return value. Other - * weakrefs with callbacks may be moved into the `old` generation. Objects - * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in - * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, - * no object in `unreachable` is weakly referenced anymore. +/* Handle weakref callbacks. Note that it's possible for such weakrefs to be + * outside the unreachable set -- indeed, those are precisely the weakrefs + * whose callbacks must be invoked. See gc_weakref.txt for overview & some + * details. + * + * The clearing of weakrefs is suble and must be done carefully, as there was + * previous bugs related to this. First, weakrefs to the unreachable set of + * objects must be cleared before we start calling `tp_clear`. If we don't, + * those weakrefs can reveal unreachable objects to Python-level code and that + * is not safe. Many objects are not usable after `tp_clear` has been called + * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as + * example bugs). + * + * Weakrefs with callbacks always need to be cleared before executing the + * callback. That's because sometimes a callback will call the ref object, + * to check if the reference is actually dead (KeyedRef does this, for + * example). We want to indicate that it is dead, even though it is possible + * a finalizer might resurrect it. Clearing also prevents the callback from + * executing more than once. + * + * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before* + * calling finalizers. However, since tp_subclasses started being necessary + * to invalidate caches (e.g. by PyType_Modified), that clearing has created + * a bug. If the weakref to the subclass is cleared before a finalizer is + * called, the cache may not be correctly invalidated. That can lead to + * segfaults since the caches can refer to deallocated objects (GH-91636 + * is an example). Now, we delay the clear of weakrefs without callbacks + * until *after* finalizers have been executed. That means weakrefs without + * callbacks are still usable while finalizers are being executed. + * + * The weakrefs that are inside the unreachable set must also be cleared. + * The reason this is required is not immediately obvious. If the weakref + * refers to an object outside of the unreachable set, that object might go + * away when we start clearing objects. Normally, the object should also be + * part of the unreachable set but that's not true in the case of incomplete + * or missing `tp_traverse` methods. When that object goes away, the callback + * for weakref can be executed and that could reveal unreachable objects to + * Python-level code. See bpo-38006 as an example bug. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) +handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) { PyGC_Head *gc; - PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ PyGC_Head *next; int num_freed = 0; - if (allow_callbacks) { - gc_list_init(&wrcb_to_call); - } + gc_list_init(&wrcb_to_call); - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot invoke any callbacks until - * all weakrefs to unreachable objects are cleared, lest the callback - * resurrect an unreachable object via a still-active weakref. We - * make another pass over wrcb_to_call, invoking callbacks, after this - * pass completes. + /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the + * callback needs to be invoked. We make another pass over wrcb_to_call, + * invoking callbacks, after this pass completes. */ for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyWeakReference **wrlist; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); next = GC_NEXT(gc); - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { continue; } @@ -923,30 +928,29 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) */ wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. + /* `op` may have some weakrefs. March over the list and move the + * weakrefs with callbacks that must be called into wrcb_to_call. */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { - PyGC_Head *wrasgc; /* AS_GC(wr) */ - - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - - if (!allow_callbacks) { - continue; - } + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). + next_wr = wr->wr_next; if (wr->wr_callback == NULL) { /* no callback */ continue; } + // Clear the weakref. See the comments above this function for + // reasons why we need to clear weakrefs that have callbacks. + // Note that _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + /* Headache time. `op` is going away, and is weakly referenced by * `wr`, which has a callback. Should the callback be invoked? If wr * is also trash, no: @@ -962,10 +966,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) * outside the current generation, CT may be reachable from the * callback. Then the callback could resurrect insane objects. * - * Since the callback is never needed and may be unsafe in this case, - * wr is simply left in the unreachable set. Note that because we - * already called _PyWeakref_ClearRef(wr), its callback will never - * trigger. + * Since the callback is never needed and may be unsafe in this + * case, wr is simply left in the unreachable set. Note that + * clear_weakrefs() will ensure its callback will not trigger + * inside delete_garbage(). * * OTOH, if wr isn't part of CT, we should invoke the callback: the * weakref outlived the trash. Note that since wr isn't CT in this @@ -976,8 +980,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) * is moved to wrcb_to_call in this case. */ if (gc_is_collecting(AS_GC((PyObject *)wr))) { - /* it should already have been cleared above */ - _PyObject_ASSERT((PyObject*)wr, wr->wr_object == Py_None); continue; } @@ -987,17 +989,13 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) Py_INCREF(wr); /* Move wr to wrcb_to_call, for the next pass. */ - wrasgc = AS_GC((PyObject *)wr); + PyGC_Head *wrasgc = AS_GC((PyObject *)wr); // wrasgc is reachable, but next isn't, so they can't be the same _PyObject_ASSERT((PyObject *)wr, wrasgc != next); gc_list_move(wrasgc, &wrcb_to_call); } } - if (!allow_callbacks) { - return 0; - } - /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ @@ -1007,9 +1005,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) PyObject *callback; gc = (PyGC_Head*)wrcb_to_call._gc_next; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); _PyObject_ASSERT(op, PyWeakref_Check(op)); - wr = (PyWeakReference *)op; + PyWeakReference *wr = (PyWeakReference *)op; callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); @@ -1048,6 +1046,56 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) return num_freed; } +/* Clear all weakrefs to unreachable objects. When this returns, no object in + * `unreachable` is weakly referenced anymore. See the comments above + * handle_weakref_callbacks() for why these weakrefs need to be cleared. + */ +static void +clear_weakrefs(PyGC_Head *unreachable) +{ + PyGC_Head *gc; + PyGC_Head *next; + + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set is always cleared. See + * the comments above handle_weakref_callbacks() for why these + * must be cleared. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs. + */ + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void debug_cycle(const char *msg, PyObject *op) { @@ -1739,8 +1787,8 @@ gc_collect_region(PyThreadState *tstate, } } - /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to, true); + /* Invoke weakref callbacks as necessary. */ + stats->collected += handle_weakref_callbacks(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1754,13 +1802,10 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); - /* Clear weakrefs to objects in the unreachable set. No Python-level - * code must be allowed to access those unreachable objects. During - * delete_garbage(), finalizers outside the unreachable set might run - * and create new weakrefs. If those weakrefs were not cleared, they - * could reveal unreachable objects. Callbacks are not executed. + /* Clear weakrefs to objects in the unreachable set. See the comments + * above handle_weakref_callbacks() for details. */ - handle_weakrefs(&final_unreachable, NULL, false); + clear_weakrefs(&final_unreachable); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 0b0ddf227e4952..842aa3401548c9 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1492,22 +1492,49 @@ move_legacy_finalizer_reachable(struct collection_state *state) return 0; } -// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// optionally enqueued in `wrcb_to_call`, but not invoked yet. +// Weakrefs with callbacks are enqueued in `wrcb_to_call`, but not invoked +// yet. Note that it's possible for such weakrefs to be outside the +// unreachable set -- indeed, those are precisely the weakrefs whose callbacks +// must be invoked. See gc_weakref.txt for overview & some details. +// +// The clearing of weakrefs is suble and must be done carefully, as there was +// previous bugs related to this. First, weakrefs to the unreachable set of +// objects must be cleared before we start calling `tp_clear`. If we don't, +// those weakrefs can reveal unreachable objects to Python-level code and that +// is not safe. Many objects are not usable after `tp_clear` has been called +// and could even cause crashes if accessed (see bpo-38006 and gh-91636 as +// example bugs). +// +// Weakrefs with callbacks always need to be cleared before executing the +// callback. That's because sometimes a callback will call the ref object, +// to check if the reference is actually dead (KeyedRef does this, for +// example). We want to indicate that it is dead, even though it is possible +// a finalizer might resurrect it. Clearing also prevents the callback from +// executing more than once. +// +// Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before* +// calling finalizers. However, since tp_subclasses started being necessary +// to invalidate caches (e.g. by PyType_Modified), that clearing has created +// a bug. If the weakref to the subclass is cleared before a finalizer is +// called, the cache may not be correctly invalidated. That can lead to +// segfaults since the caches can refer to deallocated objects (GH-91636 +// is an example). Now, we delay the clear of weakrefs without callbacks +// until *after* finalizers have been executed. That means weakrefs without +// callbacks are still usable while finalizers are being executed. +// +// The weakrefs that are inside the unreachable set must also be cleared. +// The reason this is required is not immediately obvious. If the weakref +// refers to an object outside of the unreachable set, that object might go +// away when we start clearing objects. Normally, the object should also be +// part of the unreachable set but that's not true in the case of incomplete +// or missing `tp_traverse` methods. When that object goes away, the callback +// for weakref can be executed and that could reveal unreachable objects to +// Python-level code. See bpo-38006 as an example bug. static void -clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) +find_weakref_callbacks(struct collection_state *state) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { - if (PyWeakref_Check(op)) { - // Clear weakrefs that are themselves unreachable to ensure their - // callbacks will not be executed later from a `tp_clear()` - // inside delete_garbage(). That would be unsafe: it could - // resurrect a dead object or access a an already cleared object. - // See bpo-38006 for one example. - _PyWeakref_ClearRef((PyWeakReference *)op); - } - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { continue; } @@ -1519,16 +1546,21 @@ clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) // `op` may have some weakrefs. March over the list, clear // all the weakrefs, and enqueue the weakrefs with callbacks // that must be called into wrcb_to_call. - for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { - // _PyWeakref_ClearRef clears the weakref but leaves - // the callback pointer intact. Obscure: it also - // changes *wrlist. - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - - if (!enqueue_callbacks) { - continue; + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). + next_wr = wr->wr_next; + + // Weakrefs with callbacks always need to be cleared before + // executing the callback. + if (wr->wr_callback != NULL) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); } // We do not invoke callbacks for weakrefs that are themselves @@ -1550,6 +1582,39 @@ clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) } } +// Clear weakrefs to objects in the unreachable set. See comments +// above find_weakref_callbacks() for why this clearing is required. +static void +clear_weakrefs(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (PyWeakref_Check(op)) { + // Clear weakrefs that are themselves unreachable. + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs. + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + // _PyWeakref_ClearRef clears the weakref but leaves + // the callback pointer intact. Obscure: it also + // changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void call_weakref_callbacks(struct collection_state *state) { @@ -2222,8 +2287,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Record the number of live GC objects interp->gc.long_lived_total = state->long_lived_total; - // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state, true); + // Find weakref callbacks we will honor (but do not call them). + find_weakref_callbacks(state); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2240,12 +2305,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); if (err == 0) { - // Clear weakrefs to objects in the unreachable set. No Python-level - // code must be allowed to access those unreachable objects. During - // delete_garbage(), finalizers outside the unreachable set might - // run and create new weakrefs. If those weakrefs were not cleared, - // they could reveal unreachable objects. - clear_weakrefs(state, false); + clear_weakrefs(state); } _PyEval_StartTheWorld(interp); _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: arch...@mail-archive.com