https://github.com/python/cpython/commit/1c7011d8feb8fa9a68775784c9039e1d57ce6569
commit: 1c7011d8feb8fa9a68775784c9039e1d57ce6569
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-30T00:23:32+03:00
summary:

gh-150560: Fix crash in XML parser on invalid XML with multi-byte encoding 
(GH-150568)

files:
M Lib/test/test_pyexpat.py
M Lib/test/test_xml_etree.py
M Modules/pyexpat.c

diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 3f2c5f7021018d..060a509c1bd1c7 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -426,6 +426,16 @@ def test_unknown_encoding(self):
         with self.assertRaises(LookupError):
             parser.Parse(data, True)
 
+    @support.subTests('sample,exception', [
+        (b'<x> \xa1</x>', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1</x', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1', expat.ExpatError),
+    ])
+    def test_multibyte_encoding_errors(self, sample, exception):
+        parser = expat.ParserCreate()
+        data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+        with self.assertRaises(exception):
+            parser.Parse(data, True)
 
 class NamespaceSeparatorTest(unittest.TestCase):
     def test_legal(self):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 89aff568a1b4ef..acec4ec2ca257c 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1064,6 +1064,17 @@ def bxml(encoding, body=''):
         self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
         self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
 
+    @support.subTests('sample,exception', [
+        (b'<x> \xa1</x>', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1</x', UnicodeDecodeError),  # crashed
+        (b'<x> \xa1', None), # ET.ParseError
+    ])
+    def test_multibyte_encoding_errors(self, sample, exception):
+        exception = exception or ET.ParseError
+        data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+        with self.assertRaises(exception):
+            ET.XML(data)
+
     def test_methods(self):
         # Test serialization methods.
 
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index aef6ebad9ce578..53d42ad50e37b9 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1473,6 +1473,9 @@ pyexpat_encoding_create(const char *name, PyObject 
*mapping)
 static int
 pyexpat_encoding_convert(void *data, const char *s)
 {
+    if (PyErr_Occurred()) {
+        return -1;
+    }
     pyexpat_encoding_info *info = (pyexpat_encoding_info *)data;
     int i = (unsigned char)s[0];
     assert(info->map[i] < -1);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to