https://github.com/python/cpython/commit/5755d0f083949ff3c5bf3a37e673e24e306b036e
commit: 5755d0f083949ff3c5bf3a37e673e24e306b036e
branch: main
author: Stan Ulbrych <[email protected]>
committer: emmatyping <[email protected]>
date: 2026-06-07T08:19:05-07:00
summary:
gh-150599: Prevent bz2 decompressor reuse after errors (#150600)
files:
A Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst
M Lib/test/test_bz2.py
M Modules/_bz2module.c
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py
index d8e3b671ec229f..64293d757331d7 100644
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@@ -1032,6 +1032,21 @@ def test_failure(self):
# Previously, a second call could crash due to internal inconsistency
self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
+ def test_decompress_after_data_error(self):
+ data = bytes.fromhex(
+ "425a6839314159265359000000000000007fffff000000000000000000000000"
+ "00000000000000000000000000000000000000e0370000000000000000000000"
+ "000000000000000000000000000000000000000000000000000083f3"
+ )
+ bzd = BZ2Decompressor()
+ with self.assertRaisesRegex(OSError, "Invalid data stream"):
+ bzd.decompress(data)
+ # Previously, a second call could crash due to internal inconsistency
+ self.assertFalse(bzd.needs_input)
+ self.assertFalse(bzd.eof)
+ with self.assertRaisesRegex(ValueError, "previous error"):
+ bzd.decompress(b'\x00' * 18)
+
@support.refcount_test
def test_refleaks_in___init__(self):
gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
diff --git
a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst
b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst
new file mode 100644
index 00000000000000..a37d86cf423f82
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst
@@ -0,0 +1,3 @@
+Fix a possible stack buffer overflow in :mod:`bz2` when a
+:class:`bz2.BZ2Decompressor` is reused after a decompression error.
+The decompressor now becomes unusable after libbz2 reports an error.
diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c
index 4cf8beed9ee3eb..9db3ac39da5209 100644
--- a/Modules/_bz2module.c
+++ b/Modules/_bz2module.c
@@ -108,6 +108,7 @@ typedef struct {
typedef struct {
PyObject_HEAD
bz_stream bzs;
+ int bzerror;
char eof; /* Py_T_BOOL expects a char */
PyObject *unused_data;
char needs_input;
@@ -435,8 +436,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
d->bzs_avail_in_real += bzs->avail_in;
- if (catch_bz2_error(bzret))
+ if (catch_bz2_error(bzret)) {
+ d->bzerror = bzret;
+ FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
goto error;
+ }
if (bzret == BZ_STREAM_END) {
FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1);
break;
@@ -607,10 +611,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor
*self, Py_buffer *data,
PyObject *result = NULL;
PyMutex_Lock(&self->mutex);
- if (self->eof)
+ if (self->eof) {
PyErr_SetString(PyExc_EOFError, "End of stream already reached");
- else
+ }
+ else if (self->bzerror) {
+ // Re-entering BZ2_bzDecompress() after an error can write out of
bounds.
+ PyErr_SetString(PyExc_ValueError,
+ "Decompressor is unusable after a previous error");
+ }
+ else {
result = decompress(self, data->buf, data->len, max_length);
+ }
PyMutex_Unlock(&self->mutex);
return result;
}
@@ -638,6 +649,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type)
}
self->mutex = (PyMutex){0};
+ self->bzerror = 0;
self->needs_input = 1;
self->bzs_avail_in_real = 0;
self->input_buffer = NULL;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]