https://github.com/python/cpython/commit/5577bd1298d5d9ec4305b5bc44fb0ef43a4035f1 commit: 5577bd1298d5d9ec4305b5bc44fb0ef43a4035f1 branch: 3.13 author: Stan Ulbrych <[email protected]> committer: pablogsal <[email protected]> date: 2026-03-02T23:00:59Z summary:
[3.13] gh-144872: fix heap buffer overflow `_PyTokenizer_ensure_utf8` (GH-144807) (#145441) (cherry picked from commit 3fc945df22a169e039c3f21b44c0d08390a00c0c) Co-authored-by: AdamKorcz <[email protected]> files: A Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst M Lib/test/test_source_encoding.py M Parser/tokenizer/helpers.c diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index c5280673ab8f23..607a8387a68d24 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -64,6 +64,23 @@ def test_issue7820(self): # two bytes in common with the UTF-8 BOM self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') + def test_truncated_utf8_at_eof(self): + # Regression test for https://issues.oss-fuzz.com/issues/451112368 + # Truncated multi-byte UTF-8 sequences at end of input caused an + # out-of-bounds read in Parser/tokenizer/helpers.c:valid_utf8(). + truncated = [ + b'\xc2', # 2-byte lead, missing 1 continuation + b'\xdf', # 2-byte lead, missing 1 continuation + b'\xe0', # 3-byte lead, missing 2 continuations + b'\xe0\xa0', # 3-byte lead, missing 1 continuation + b'\xf0\x90', # 4-byte lead, missing 2 continuations + b'\xf0\x90\x80', # 4-byte lead, missing 1 continuation + b'\xf3', # 4-byte lead, missing 3 (the oss-fuzz reproducer) + ] + for seq in truncated: + with self.subTest(seq=seq): + self.assertRaises(SyntaxError, compile, seq, '<test>', 'exec') + @requires_subprocess() def test_20731(self): sub = subprocess.Popen([sys.executable, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst new file mode 100644 index 00000000000000..c06bf01baee6fd --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst @@ -0,0 +1 @@ +Fix heap buffer overflow in the parser found by OSS-Fuzz. diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index 9c9d05bbef0f1a..aa6da82319e1c2 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -481,9 +481,11 @@ valid_utf8(const unsigned char* s) return 0; } length = expected + 1; - for (; expected; expected--) - if (s[expected] < 0x80 || s[expected] >= 0xC0) + for (int i = 1; i <= expected; i++) { + if (s[i] < 0x80 || s[i] >= 0xC0) { return 0; + } + } return length; } _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
