https://github.com/python/cpython/commit/6ea04da27036eaa69d65150148bb8c537d9beacf
commit: 6ea04da27036eaa69d65150148bb8c537d9beacf
branch: main
author: Stephen Morton <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2025-01-07T12:40:41+02:00
summary:

gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284)

* Allow DOMParser.parse() to correctly handle DOMInputSource instances
  that only have a systemId attribute set.
* Fix DOMEntityResolver.resolveEntity(), which was broken by the
  Python 3.0 transition.
* Add Lib/test/test_xml_dom_xmlbuilder.py with few tests.

files:
A Lib/test/test_xml_dom_xmlbuilder.py
A Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst
A Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst
M Lib/xml/dom/xmlbuilder.py

diff --git a/Lib/test/test_xml_dom_xmlbuilder.py 
b/Lib/test/test_xml_dom_xmlbuilder.py
new file mode 100644
index 00000000000000..5f5f2eb328df9f
--- /dev/null
+++ b/Lib/test/test_xml_dom_xmlbuilder.py
@@ -0,0 +1,88 @@
+import io
+import unittest
+from http import client
+from test.test_httplib import FakeSocket
+from unittest import mock
+from xml.dom import getDOMImplementation, minidom, xmlbuilder
+
+SMALL_SAMPLE = b"""<?xml version="1.0"?>
+<html xmlns="http://www.w3.org/1999/xhtml"; 
xmlns:xdc="http://www.xml.com/books";>
+<!-- A comment -->
+<title>Introduction to XSL</title>
+<hr/>
+<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. 
Namespace</xdc:author></p>
+</html>"""
+
+
+class XMLBuilderTest(unittest.TestCase):
+    def test_entity_resolver(self):
+        body = (
+            b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+            + SMALL_SAMPLE
+        )
+
+        sock = FakeSocket(body)
+        response = client.HTTPResponse(sock)
+        response.begin()
+        attrs = {"open.return_value": response}
+        opener = mock.Mock(**attrs)
+
+        resolver = xmlbuilder.DOMEntityResolver()
+
+        with mock.patch("urllib.request.build_opener") as mock_build:
+            mock_build.return_value = opener
+            source = resolver.resolveEntity(None, 
"http://example.com/2000/svg";)
+
+        self.assertIsInstance(source, xmlbuilder.DOMInputSource)
+        self.assertIsNone(source.publicId)
+        self.assertEqual(source.systemId, "http://example.com/2000/svg";)
+        self.assertEqual(source.baseURI, "http://example.com/2000/";)
+        self.assertEqual(source.encoding, "utf-8")
+        self.assertIs(source.byteStream, response)
+
+        self.assertIsNone(source.characterStream)
+        self.assertIsNone(source.stringData)
+
+    def test_builder(self):
+        imp = getDOMImplementation()
+        self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS)
+
+        builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+        self.assertIsInstance(builder, xmlbuilder.DOMBuilder)
+
+    def test_parse_uri(self):
+        body = (
+            b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+            + SMALL_SAMPLE
+        )
+
+        sock = FakeSocket(body)
+        response = client.HTTPResponse(sock)
+        response.begin()
+        attrs = {"open.return_value": response}
+        opener = mock.Mock(**attrs)
+
+        with mock.patch("urllib.request.build_opener") as mock_build:
+            mock_build.return_value = opener
+
+            imp = getDOMImplementation()
+            builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+            document = builder.parseURI("http://example.com/2000/svg";)
+
+        self.assertIsInstance(document, minidom.Document)
+        self.assertEqual(len(document.childNodes), 1)
+
+    def test_parse_with_systemId(self):
+        response = io.BytesIO(SMALL_SAMPLE)
+
+        with mock.patch("urllib.request.urlopen") as mock_open:
+            mock_open.return_value = response
+
+            imp = getDOMImplementation()
+            source = imp.createDOMInputSource()
+            builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+            source.systemId = "http://example.com/2000/svg";
+            document = builder.parse(source)
+
+        self.assertIsInstance(document, minidom.Document)
+        self.assertEqual(len(document.childNodes), 1)
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py
index 8a200263497b89..a8852625a2f9a2 100644
--- a/Lib/xml/dom/xmlbuilder.py
+++ b/Lib/xml/dom/xmlbuilder.py
@@ -189,7 +189,7 @@ def parse(self, input):
         options.filter = self.filter
         options.errorHandler = self.errorHandler
         fp = input.byteStream
-        if fp is None and options.systemId:
+        if fp is None and input.systemId:
             import urllib.request
             fp = urllib.request.urlopen(input.systemId)
         return self._parse_bytestream(fp, options)
@@ -247,10 +247,12 @@ def _create_opener(self):
 
     def _guess_media_encoding(self, source):
         info = source.byteStream.info()
-        if "Content-Type" in info:
-            for param in info.getplist():
-                if param.startswith("charset="):
-                    return param.split("=", 1)[1].lower()
+        # import email.message
+        # assert isinstance(info, email.message.Message)
+        charset = info.get_param('charset')
+        if charset is not None:
+            return charset.lower()
+        return None
 
 
 class DOMInputSource(object):
diff --git 
a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst 
b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst
new file mode 100644
index 00000000000000..56e2fe6f85f4bf
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst
@@ -0,0 +1,3 @@
+Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle
+:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a
+:attr:`!systemId` attribute set.
diff --git 
a/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst 
b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst
new file mode 100644
index 00000000000000..98c07297b06f8a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst
@@ -0,0 +1,2 @@
+Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was
+broken by the Python 3.0 transition.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to