https://github.com/python/cpython/commit/b46df7fe42459124d242571a979f2cedfb1e0a91 commit: b46df7fe42459124d242571a979f2cedfb1e0a91 branch: 3.13 author: Miss Islington (bot) <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2026-05-30T11:16:32Z summary:
[3.13] gh-149489: Fix ElementTree serialization to HTML (GH-149490) (GH-150596) (GH-150609) * The content of elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer escaped. * The "plaintext" element no longer have the closing tag. (cherry picked from commit c42e6d3f1a066186f74d3971df4c512bc11c7997) (cherry picked from commit bcd29e466f55d8b4e3849ed6ada8ce86a46f5072) Co-authored-by: Serhiy Storchaka <[email protected]> files: A Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst M Lib/test/test_xml_etree.py M Lib/xml/etree/ElementTree.py diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 51f8a2d52161cc6..1a39de8b5aac3aa 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1248,7 +1248,12 @@ def check(p, expected, namespaces=None): {'': 'http://www.w3.org/2001/XMLSchema', 'ns': 'http://www.w3.org/2001/XMLSchema'}) - def test_processinginstruction(self): + def test_comment_serialization(self): + comm = ET.Comment('<spam> & ham') + # comments are not escaped + self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->') + + def test_processinginstruction_serialization(self): # Test ProcessingInstruction directly self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), @@ -1257,13 +1262,22 @@ def test_processinginstruction(self): b'<?test instruction?>') # Issue #2746 - + # processing instructions are not escaped self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), b'<?test <testing&>?>') self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), b"<?xml version='1.0' encoding='latin-1'?>\n" b"<?test <testing&>\xe3?>") + @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes")) + def test_html_cdata_elems_serialization(self, tag): + # content of raw text elements is not escaped in html + tag = tag.title() + elem = ET.Element(tag) + elem.text = '<spam>&ham' + self.assertEqual(ET.tostring(elem, method='html'), + ('<%s><spam>&ham</%s>' % (tag, tag)).encode()) + def test_html_empty_elems_serialization(self): # issue 15970 # from http://www.w3.org/TR/html401/index/elements.html @@ -1278,6 +1292,14 @@ def test_html_empty_elems_serialization(self): method='html') self.assertEqual(serialized, expected) + def test_html_plaintext_serialization(self): + # content of plaintext is not escaped in html + # no end tag for plaintext + elem = ET.Element('PlainText') + elem.text = '<spam>&ham' + self.assertEqual(ET.tostring(elem, method='html'), + b'<PlainText><spam>&ham') + def test_dump_attribute_order(self): # See BPO 34160 e = ET.Element('cirriculum', status='public', company='example') diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index e8e237bf7815fd1..ef162a967d8f889 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -913,9 +913,12 @@ def _serialize_xml(write, elem, qnames, namespaces, if elem.tail: write(_escape_cdata(elem.tail)) +_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed", + "noframes", "plaintext"} + HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr", "img", "input", "isindex", "link", "meta", "param", "source", - "track", "wbr"} + "track", "wbr", "plaintext"} def _serialize_html(write, elem, qnames, namespaces, **kwargs): tag = elem.tag @@ -956,7 +959,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): write(">") ltag = tag.lower() if text: - if ltag == "script" or ltag == "style": + if ltag in _CDATA_CONTENT_ELEMENTS: write(text) else: write(_escape_cdata(text)) diff --git a/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst new file mode 100644 index 000000000000000..4f47d36fe2c8369 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst @@ -0,0 +1,3 @@ +Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of +elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer +escaped. The "plaintext" element no longer have the closing tag. _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
