https://github.com/python/cpython/commit/db4b1948bc4ad5e7230fe1f2814da409ca1d669b commit: db4b1948bc4ad5e7230fe1f2814da409ca1d669b branch: main author: Cody Maloney <[email protected]> committer: vstinner <[email protected]> date: 2026-06-09T12:31:44+02:00 summary:
gh-143008: Fix Null pointer dereferences in TextIOWrapper underlying stream access (#145957) TextIOWrapper keeps its underlying stream in a member called `self->buffer`. That stream can be detached by user code, such as custom `.flush` implementations resulting in `self->buffer` being set to NULL. The implementation often checked at the start of functions if `self->buffer` is in a good state, but did not always recheck after other Python code was called which could modify `self->buffer`. The cases which need to be re-checked are hard to spot so rather than rely on reviewer effort create better safety by making all self->buffer access go through helper functions. Thank you yihong0618 for the test, NEWS and initial implementation in gh-143041. Co-authored-by: yihong0618 <[email protected]> Co-authored-by: Victor Stinner <[email protected]> files: A Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst A Misc/NEWS.d/next/Library/2026-06-04-18-22-56.gh-issue-143008.z5tw-J.rst M Lib/test/test_io/test_textio.py M Modules/_io/clinic/textio.c.h M Modules/_io/textio.c diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index d725f9212ceaaef..82096ab09873955 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -1560,6 +1560,56 @@ def closed(self): wrapper = self.TextIOWrapper(raw) wrapper.close() # should not crash + def test_reentrant_detach_during_flush(self): + # gh-143008: Reentrant detach() during flush should not crash. + + class DetachOnce(self.BufferedRandom): + wrapper = None + + def detach_once(self): + original = self.wrapper + self.wrapper = None + if original is not None: + original.detach() + original.flush() + + class DetachOnFlush(DetachOnce): + def flush(self): + self.detach_once() + + class DetachOnWrite(DetachOnce): + def write(self, b): + self.detach_once() + return len(b) + + # Separate reference for after detach_once. + wrapper = None + + def make_text(buffer): + nonlocal wrapper + buffer.wrapper = self.TextIOWrapper(buffer, encoding='utf-8') + wrapper = buffer.wrapper + + # Many calls could result in the same null self->buffer crash. + tests = [ + ('truncate', lambda: wrapper.truncate(0)), + ('close', lambda: wrapper.close()), + ('detach', lambda: wrapper.detach()), + ('seek', lambda: wrapper.seek(0)), + ('tell', lambda: wrapper.tell()), + ('reconfigure', lambda: wrapper.reconfigure(line_buffering=True)), + ] + for name, method in tests: + with self.subTest(name): + make_text(DetachOnFlush(self.MockRawIO())) + self.assertRaisesRegex(ValueError, "detached", method) + + # Should not crash. + with self.subTest('read via writeflush'): + make_text(DetachOnWrite(self.MockRawIO())) + wrapper.write('x') + self.assertRaisesRegex(ValueError, "detached", wrapper.read) + class PyTextIOWrapperTest(TextIOWrapperTest, PyTestCase): shutdown_error = "LookupError: unknown encoding: ascii" diff --git a/Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst b/Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst new file mode 100644 index 000000000000000..907cdb770ab65f9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst @@ -0,0 +1,2 @@ +Fix crash in :class:`io.TextIOWrapper` when reentrant +:meth:`io.TextIOBase.detach` is called reentrantly from the underlying buffer. diff --git a/Misc/NEWS.d/next/Library/2026-06-04-18-22-56.gh-issue-143008.z5tw-J.rst b/Misc/NEWS.d/next/Library/2026-06-04-18-22-56.gh-issue-143008.z5tw-J.rst new file mode 100644 index 000000000000000..e99bc39c45f9b8f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-04-18-22-56.gh-issue-143008.z5tw-J.rst @@ -0,0 +1 @@ +Fix race conditions when re-initializing a :class:`io.TextIOWrapper` object. diff --git a/Modules/_io/clinic/textio.c.h b/Modules/_io/clinic/textio.c.h index 9407076b850cee9..8d59bda5f74b386 100644 --- a/Modules/_io/clinic/textio.c.h +++ b/Modules/_io/clinic/textio.c.h @@ -666,7 +666,9 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs) goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_TextIOWrapper___init___impl((textio *)self, buffer, encoding, errors, newline, line_buffering, write_through); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1329,4 +1331,4 @@ _io_TextIOWrapper__CHUNK_SIZE_set(PyObject *self, PyObject *value, void *Py_UNUS return return_value; } -/*[clinic end generated code: output=f900b42090c9781c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8c571c9dba87d2b1 input=a9049054013a1b77]*/ diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index e80b75066c59a61..24e08cec88f2a3a 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -672,6 +672,8 @@ struct textio int ok; /* initialized? */ int detached; Py_ssize_t chunk_size; + /* Use helpers buffer_*() functions to access buffer; many operations can set it to + NULL (see gh-143008, gh-142594). */ PyObject *buffer; PyObject *encoding; PyObject *encoder; @@ -729,6 +731,67 @@ struct textio #define textio_CAST(op) ((textio *)(op)) +/* Helpers to safely operate on self->buffer. + + self->buffer can be detached (set to NULL) by any user code that is called + leading to NULL pointer dereferences (see gh-143008, gh-142594). Protect + against that by using helpers to check self->buffer validity at callsites. */ +static PyObject * +buffer_access_safe(textio *self) +{ + /* Check self->buffer directly but match errors of CHECK_ATTACHED since this + is called during construction and finalization where self->ok == 0. */ + if (self->buffer == NULL) { + if (self->ok <= 0) { + PyErr_SetString(PyExc_ValueError, + "I/O operation on uninitialized object"); + } + else { + PyErr_SetString(PyExc_ValueError, + "underlying buffer has been detached"); + } + return NULL; + } + + /* Returning a borrowed reference is safe since TextIOWrapper methods are + protected by critical sections. */ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + return self->buffer; +} + +static PyObject * +buffer_getattr(textio *self, PyObject *attr_name) +{ + PyObject *buffer = buffer_access_safe(self); + if (buffer == NULL) { + return NULL; + } + + return PyObject_GetAttr(buffer, attr_name); +} + +static PyObject * +buffer_callmethod_noargs(textio *self, PyObject *name) +{ + PyObject *buffer = buffer_access_safe(self); + if (buffer == NULL) { + return NULL; + } + + return PyObject_CallMethodNoArgs(buffer, name); +} + +static PyObject * +buffer_callmethod_onearg(textio *self, PyObject *name, PyObject *arg) +{ + PyObject *buffer = buffer_access_safe(self); + if (buffer == NULL) { + return NULL; + } + + return PyObject_CallMethodOneArg(buffer, name, arg); +} + static void textiowrapper_set_decoded_chars(textio *self, PyObject *chars); @@ -898,7 +961,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info, PyObject *res; int r; - res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); + res = buffer_callmethod_noargs(self, &_Py_ID(readable)); if (res == NULL) return -1; @@ -954,7 +1017,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info, PyObject *res; int r; - res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); + res = buffer_callmethod_noargs(self, &_Py_ID(writable)); if (res == NULL) return -1; @@ -1000,8 +1063,7 @@ _textiowrapper_fix_encoder_state(textio *self) self->encoding_start_of_stream = 1; - PyObject *cookieObj = PyObject_CallMethodNoArgs( - self->buffer, &_Py_ID(tell)); + PyObject *cookieObj = buffer_callmethod_noargs(self, &_Py_ID(tell)); if (cookieObj == NULL) { return -1; } @@ -1061,6 +1123,7 @@ io_check_errors(PyObject *errors) /*[clinic input] +@critical_section _io.TextIOWrapper.__init__ buffer: object encoding: str(accept={str, NoneType}) = None @@ -1104,7 +1167,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, const char *encoding, PyObject *errors, const char *newline, int line_buffering, int write_through) -/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/ +/*[clinic end generated code: output=72267c0c01032ed2 input=0f077220214c40a4]*/ { PyObject *raw, *codec_info = NULL; PyObject *res; @@ -1568,11 +1631,14 @@ _io_TextIOWrapper_detach_impl(textio *self) /*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/ { PyObject *buffer; - CHECK_ATTACHED(self); if (_PyFile_Flush((PyObject *)self) < 0) { return NULL; } - buffer = self->buffer; + /* _PyFile_Flush could detach before returning; raise an exception. */ + buffer = buffer_access_safe(self); + if (buffer == NULL) { + return NULL; + } self->buffer = NULL; self->detached = 1; return buffer; @@ -1641,7 +1707,7 @@ _textiowrapper_writeflush(textio *self) PyObject *ret; do { - ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b); + ret = buffer_callmethod_onearg(self, &_Py_ID(write), b); } while (ret == NULL && _PyIO_trap_eintr()); Py_DECREF(b); // NOTE: We cleared buffer but we don't know how many bytes are actually written @@ -1787,7 +1853,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) } if (needflush) { - if (_PyFile_Flush(self->buffer) < 0) { + PyObject *buffer = buffer_access_safe(self); + if (buffer == NULL || _PyFile_Flush(buffer) < 0) { return NULL; } } @@ -1917,9 +1984,10 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) if (chunk_size == NULL) goto fail; - input_chunk = PyObject_CallMethodOneArg(self->buffer, - (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), - chunk_size); + input_chunk = buffer_callmethod_onearg(self, + (self->has_read1 ? &_Py_ID(read1) : + &_Py_ID(read)), + chunk_size); Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; @@ -2003,7 +2071,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) if (n < 0) { /* Read everything */ - PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read)); + PyObject *bytes = buffer_callmethod_noargs(self, &_Py_ID(read)); PyObject *decoded; if (bytes == NULL) goto fail; @@ -2600,7 +2668,11 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) Py_DECREF(res); } - res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2); + PyObject *buf = buffer_access_safe(self); + if (buf == NULL) { + goto fail; + } + res = _PyObject_CallMethod(buf, &_Py_ID(seek), "ii", 0, 2); Py_CLEAR(cookieObj); if (res == NULL) goto fail; @@ -2648,7 +2720,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) posobj = PyLong_FromOff_t(cookie.start_pos); if (posobj == NULL) goto fail; - res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj); + res = buffer_callmethod_onearg(self, &_Py_ID(seek), posobj); Py_DECREF(posobj); if (res == NULL) goto fail; @@ -2665,8 +2737,15 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) if (cookie.chars_to_skip) { /* Just like _read_chunk, feed the decoder and save a snapshot. */ - PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read), - "i", cookie.bytes_to_feed); + PyObject *bytes_to_feed = PyLong_FromLong(cookie.bytes_to_feed); + if (bytes_to_feed == NULL) { + goto fail; + } + PyObject *input_chunk = buffer_callmethod_onearg(self, + &_Py_ID(read), + bytes_to_feed); + Py_DECREF(bytes_to_feed); + PyObject *decoded; if (input_chunk == NULL) @@ -2765,7 +2844,7 @@ _io_TextIOWrapper_tell_impl(textio *self) goto fail; } - posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell)); + posobj = buffer_callmethod_noargs(self, &_Py_ID(tell)); if (posobj == NULL) goto fail; @@ -2975,7 +3054,7 @@ _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) return NULL; } - return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos); + return buffer_callmethod_onearg(self, &_Py_ID(truncate), pos); } static PyObject * @@ -3057,8 +3136,7 @@ static PyObject * _io_TextIOWrapper_fileno_impl(textio *self) /*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno)); + return buffer_callmethod_noargs(self, &_Py_ID(fileno)); } /*[clinic input] @@ -3070,8 +3148,7 @@ static PyObject * _io_TextIOWrapper_seekable_impl(textio *self) /*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable)); + return buffer_callmethod_noargs(self, &_Py_ID(seekable)); } /*[clinic input] @@ -3083,8 +3160,7 @@ static PyObject * _io_TextIOWrapper_readable_impl(textio *self) /*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); + return buffer_callmethod_noargs(self, &_Py_ID(readable)); } /*[clinic input] @@ -3096,8 +3172,7 @@ static PyObject * _io_TextIOWrapper_writable_impl(textio *self) /*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); + return buffer_callmethod_noargs(self, &_Py_ID(writable)); } /*[clinic input] @@ -3109,8 +3184,7 @@ static PyObject * _io_TextIOWrapper_isatty_impl(textio *self) /*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty)); + return buffer_callmethod_noargs(self, &_Py_ID(isatty)); } /*[clinic input] @@ -3127,7 +3201,7 @@ _io_TextIOWrapper_flush_impl(textio *self) self->telling = self->seekable; if (_textiowrapper_writeflush(self) < 0) return NULL; - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush)); + return buffer_callmethod_noargs(self, &_Py_ID(flush)); } /*[clinic input] @@ -3160,8 +3234,9 @@ _io_TextIOWrapper_close_impl(textio *self) else { PyObject *exc = NULL; if (self->finalizing) { - res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn), - (PyObject *)self); + res = buffer_callmethod_onearg(self, + &_Py_ID(_dealloc_warn), + (PyObject *)self); if (res) { Py_DECREF(res); } @@ -3173,7 +3248,7 @@ _io_TextIOWrapper_close_impl(textio *self) exc = PyErr_GetRaisedException(); } - res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close)); + res = buffer_callmethod_noargs(self, &_Py_ID(close)); if (exc != NULL) { _PyErr_ChainExceptions1(exc); Py_CLEAR(res); @@ -3241,8 +3316,7 @@ static PyObject * _io_TextIOWrapper_name_get_impl(textio *self) /*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/ { - CHECK_ATTACHED(self); - return PyObject_GetAttr(self->buffer, &_Py_ID(name)); + return buffer_getattr(self, &_Py_ID(name)); } /*[clinic input] @@ -3255,8 +3329,17 @@ static PyObject * _io_TextIOWrapper_closed_get_impl(textio *self) /*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/ { - CHECK_ATTACHED(self); - return PyObject_GetAttr(self->buffer, &_Py_ID(closed)); + /* If partially constructed or deconstructed, return that the underlying + buffer is closed. + + The code managing the transition is responsible for closing. The closed + attribute is often called in re-initalization, as part of repr in error + cases, and when the I/O stack is garbage collected. */ + if (self->ok <= 0) { + Py_RETURN_TRUE; + } + + return buffer_getattr(self, &_Py_ID(closed)); } /*[clinic input] _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
