https://github.com/python/cpython/commit/1c7011d8feb8fa9a68775784c9039e1d57ce6569
commit: 1c7011d8feb8fa9a68775784c9039e1d57ce6569
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-30T00:23:32+03:00
summary:
gh-150560: Fix crash in XML parser on invalid XML with multi-byte encoding
(GH-150568)
files:
M Lib/test/test_pyexpat.py
M Lib/test/test_xml_etree.py
M Modules/pyexpat.c
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 3f2c5f7021018d..060a509c1bd1c7 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -426,6 +426,16 @@ def test_unknown_encoding(self):
with self.assertRaises(LookupError):
parser.Parse(data, True)
+ @support.subTests('sample,exception', [
+ (b'<x> \xa1</x>', UnicodeDecodeError), # crashed
+ (b'<x> \xa1</x', UnicodeDecodeError), # crashed
+ (b'<x> \xa1', expat.ExpatError),
+ ])
+ def test_multibyte_encoding_errors(self, sample, exception):
+ parser = expat.ParserCreate()
+ data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+ with self.assertRaises(exception):
+ parser.Parse(data, True)
class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 89aff568a1b4ef..acec4ec2ca257c 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1064,6 +1064,17 @@ def bxml(encoding, body=''):
self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
+ @support.subTests('sample,exception', [
+ (b'<x> \xa1</x>', UnicodeDecodeError), # crashed
+ (b'<x> \xa1</x', UnicodeDecodeError), # crashed
+ (b'<x> \xa1', None), # ET.ParseError
+ ])
+ def test_multibyte_encoding_errors(self, sample, exception):
+ exception = exception or ET.ParseError
+ data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
+ with self.assertRaises(exception):
+ ET.XML(data)
+
def test_methods(self):
# Test serialization methods.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index aef6ebad9ce578..53d42ad50e37b9 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1473,6 +1473,9 @@ pyexpat_encoding_create(const char *name, PyObject
*mapping)
static int
pyexpat_encoding_convert(void *data, const char *s)
{
+ if (PyErr_Occurred()) {
+ return -1;
+ }
pyexpat_encoding_info *info = (pyexpat_encoding_info *)data;
int i = (unsigned char)s[0];
assert(info->map[i] < -1);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]