https://github.com/python/cpython/commit/46a54ea5b0e4658db6bc40706ac3d4d86734e599
commit: 46a54ea5b0e4658db6bc40706ac3d4d86734e599
branch: 3.15
author: Miss Islington (bot) <[email protected]>
committer: picnixz <[email protected]>
date: 2026-05-10T14:11:57Z
summary:

[3.15] gh-148441: Avoid integer overflow in Expat's CharacterDataHandler 
(GH-148904) (#149639)

gh-148441: Avoid integer overflow in Expat's CharacterDataHandler (GH-148904)
(cherry picked from commit bc1be4f6174086b4a46e3fe656552f5bb4e6e7b2)

Co-authored-by: ByteFlow <[email protected]>
Co-authored-by: Bénédikt Tran <[email protected]>

files:
A Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst
M Lib/test/test_pyexpat.py
M Modules/pyexpat.c

diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index aaa91aca36e3c4..9a1620029c6da9 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -712,6 +712,20 @@ def test_change_size_2(self):
         parser.Parse(xml2, True)
         self.assertEqual(self.n, 4)
 
+    @support.requires_resource('cpu')
+    @support.requires_resource('walltime')
+    @support.bigmemtest(size=2**31, memuse=4, dry_run=False)
+    def test_large_character_data_does_not_crash(self):
+        # See https://github.com/python/cpython/issues/148441
+        parser = expat.ParserCreate()
+        parser.buffer_text = True
+        parser.buffer_size = 2**31 - 1  # INT_MAX
+        N = 2049 * (1 << 20) - 3  # Character data greater than INT_MAX
+        self.assertGreater(N, parser.buffer_size)
+        parser.CharacterDataHandler = lambda text: None
+        xml_data = b"<r>" + b"A" * N + b"</r>"
+        self.assertEqual(parser.Parse(xml_data, True), 1)
+
 class ElementDeclHandlerTest(unittest.TestCase):
     def test_trigger_leak(self):
         # Unfixed, this test would leak the memory of the so-called
diff --git 
a/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst 
b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst
new file mode 100644
index 00000000000000..762815270e4d40
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst
@@ -0,0 +1,4 @@
+:mod:`xml.parsers.expat`: prevent a crash in
+:meth:`~xml.parsers.expat.xmlparser.CharacterDataHandler`
+when the character data size exceeds the parser's
+:attr:`buffer size <xml.parsers.expat.xmlparser.buffer_size>`.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 0f0afe17513ef1..c01f7babe74527 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -393,7 +393,7 @@ my_CharacterDataHandler(void *userData, const XML_Char 
*data, int len)
     if (self->buffer == NULL)
         call_character_handler(self, data, len);
     else {
-        if ((self->buffer_used + len) > self->buffer_size) {
+        if (len > (self->buffer_size - self->buffer_used)) {
             if (flush_character_buffer(self) < 0)
                 return;
             /* handler might have changed; drop the rest on the floor

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to