https://github.com/python/cpython/commit/bba6c1d9d5721e763df88af187bc0a531e1708b6 commit: bba6c1d9d5721e763df88af187bc0a531e1708b6 branch: 3.14 author: Shamil <[email protected]> committer: gpshead <[email protected]> date: 2026-05-19T20:31:43-07:00 summary:
[3.14] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) (#150078) gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. (cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047) Co-authored-by: Kumar Aditya <[email protected]> Co-authored-by: Gregory P. Smith <[email protected]> files: A Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst M Lib/test/test_json/test_speedups.py M Modules/_json.c diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 4c0aa5f993b30f..0b22a0bf4b9538 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -1,4 +1,5 @@ from test.test_json import CTest +from test.support import gc_collect class BadBool: @@ -111,3 +112,63 @@ def test_current_indent_level(self): self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2) self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0) self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}]) + + def test_mutate_dict_items_during_encode(self): + # gh-142831: Clearing the items list via a re-entrant key encoder + # must not cause a use-after-free. BadDict.items() returns a + # mutable list; encode_str clears it while iterating. + items = None + + class BadDict(dict): + def items(self): + nonlocal items + items = [("boom", object())] + return items + + cleared = False + def encode_str(obj): + nonlocal items, cleared + if items is not None: + items.clear() + items = None + cleared = True + gc_collect() + return '"x"' + + encoder = self.json.encoder.c_make_encoder( + None, lambda o: "null", + encode_str, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(BadDict(real=1), 0) + self.assertTrue(cleared) + + def test_mutate_list_during_encode(self): + # gh-142831: Clearing a list mid-iteration via the default + # callback must not cause a use-after-free. + call_count = 0 + lst = [object() for _ in range(10)] + + def default(obj): + nonlocal call_count + call_count += 1 + if call_count == 3: + lst.clear() + gc_collect() + return None + + encoder = self.json.encoder.c_make_encoder( + None, default, + self.json.encoder.c_encode_basestring, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(lst, 0) + # Verify the mutation path was actually hit and the loop + # stopped iterating after the list was cleared. + self.assertEqual(call_count, 3) diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst new file mode 100644 index 00000000000000..5fa3cd2727a9e5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst @@ -0,0 +1,2 @@ +Fix a crash in the :mod:`json` module where a use-after-free could occur if +the object being encoded is modified during serialization. diff --git a/Modules/_json.c b/Modules/_json.c index 39ec9a969cf04d..39cdb9fd4f40c8 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1702,9 +1702,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the items list, invalidating this borrowed ref. + Py_INCREF(item); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + Py_DECREF(item); goto bail; } @@ -1712,18 +1716,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, value = PyTuple_GET_ITEM(item, 1); if (encoder_encode_key_value(s, writer, &first, dct, key, value, indent_level, indent_cache, - separator) < 0) + separator) < 0) { + Py_DECREF(item); goto bail; + } + Py_DECREF(item); } Py_CLEAR(items); } else { Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the dict, invalidating these borrowed refs. + Py_INCREF(key); + Py_INCREF(value); if (encoder_encode_key_value(s, writer, &first, dct, key, value, indent_level, indent_cache, - separator) < 0) + separator) < 0) { + Py_DECREF(key); + Py_DECREF(value); goto bail; + } + Py_DECREF(key); + Py_DECREF(value); } } @@ -1800,14 +1816,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, } for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + // gh-142831: encoder_listencode_obj() can invoke user code + // that mutates the sequence, invalidating this borrowed ref. + Py_INCREF(obj); if (i) { - if (PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { + Py_DECREF(obj); goto bail; + } } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { _PyErr_FormatNote("when serializing %T item %zd", seq, i); + Py_DECREF(obj); goto bail; } + Py_DECREF(obj); } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
