https://github.com/python/cpython/commit/e87baa8d24f08a1b71a1c9ceb4fbf26824d48239 commit: e87baa8d24f08a1b71a1c9ceb4fbf26824d48239 branch: 3.13 author: Shamil <[email protected]> committer: gpshead <[email protected]> date: 2026-05-19T20:33:22-07:00 summary:
[3.13] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) (#150079) gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. (cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047) Co-authored-by: Kumar Aditya <[email protected]> Co-authored-by: Gregory P. Smith <[email protected]> files: A Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst M Lib/test/test_json/test_speedups.py M Modules/_json.c diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 682014cfd5b344..7c2789f8d1f235 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -1,4 +1,5 @@ from test.test_json import CTest +from test.support import gc_collect class BadBool: @@ -80,3 +81,63 @@ def test(name): def test_unsortable_keys(self): with self.assertRaises(TypeError): self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'}) + + def test_mutate_dict_items_during_encode(self): + # gh-142831: Clearing the items list via a re-entrant key encoder + # must not cause a use-after-free. BadDict.items() returns a + # mutable list; encode_str clears it while iterating. + items = None + + class BadDict(dict): + def items(self): + nonlocal items + items = [("boom", object())] + return items + + cleared = False + def encode_str(obj): + nonlocal items, cleared + if items is not None: + items.clear() + items = None + cleared = True + gc_collect() + return '"x"' + + encoder = self.json.encoder.c_make_encoder( + None, lambda o: "null", + encode_str, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(BadDict(real=1), 0) + self.assertTrue(cleared) + + def test_mutate_list_during_encode(self): + # gh-142831: Clearing a list mid-iteration via the default + # callback must not cause a use-after-free. + call_count = 0 + lst = [object() for _ in range(10)] + + def default(obj): + nonlocal call_count + call_count += 1 + if call_count == 3: + lst.clear() + gc_collect() + return None + + encoder = self.json.encoder.c_make_encoder( + None, default, + self.json.encoder.c_encode_basestring, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(lst, 0) + # Verify the mutation path was actually hit and the loop + # stopped iterating after the list was cleared. + self.assertEqual(call_count, 3) diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst new file mode 100644 index 00000000000000..5fa3cd2727a9e5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst @@ -0,0 +1,2 @@ +Fix a crash in the :mod:`json` module where a use-after-free could occur if +the object being encoded is modified during serialization. diff --git a/Modules/_json.c b/Modules/_json.c index afefc71bfbdd9a..25cbd9b9bb89b4 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1602,9 +1602,13 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the items list, invalidating this borrowed ref. + Py_INCREF(item); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + Py_DECREF(item); goto bail; } @@ -1612,18 +1616,30 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, value = PyTuple_GET_ITEM(item, 1); if (encoder_encode_key_value(s, writer, &first, key, value, new_newline_indent, - current_item_separator) < 0) + current_item_separator) < 0) { + Py_DECREF(item); goto bail; + } + Py_DECREF(item); } Py_CLEAR(items); } else { Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the dict, invalidating these borrowed refs. + Py_INCREF(key); + Py_INCREF(value); if (encoder_encode_key_value(s, writer, &first, key, value, new_newline_indent, - current_item_separator) < 0) + current_item_separator) < 0) { + Py_DECREF(key); + Py_DECREF(value); goto bail; + } + Py_DECREF(key); + Py_DECREF(value); } } @@ -1712,12 +1728,20 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + // gh-142831: encoder_listencode_obj() can invoke user code + // that mutates the sequence, invalidating this borrowed ref. + Py_INCREF(obj); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) { + Py_DECREF(obj); goto bail; + } } - if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) + if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { + Py_DECREF(obj); goto bail; + } + Py_DECREF(obj); } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
