https://github.com/python/cpython/commit/c42e6d3f1a066186f74d3971df4c512bc11c7997
commit: c42e6d3f1a066186f74d3971df4c512bc11c7997
branch: 3.14
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-30T13:52:00+03:00
summary:
[3.14] gh-149489: Fix ElementTree serialization to HTML (GH-149490) (GH-150596)
* The content of elements "xmp", "iframe", "noembed", "noframes",
and "plaintext" is no longer escaped.
* The "plaintext" element no longer have the closing tag.
(cherry picked from commit bcd29e466f55d8b4e3849ed6ada8ce86a46f5072)
files:
A Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
M Lib/test/test_xml_etree.py
M Lib/xml/etree/ElementTree.py
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index eb5ca80b1aaca15..8c693bfbdb39d92 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1247,7 +1247,12 @@ def check(p, expected, namespaces=None):
{'': 'http://www.w3.org/2001/XMLSchema',
'ns': 'http://www.w3.org/2001/XMLSchema'})
- def test_processinginstruction(self):
+ def test_comment_serialization(self):
+ comm = ET.Comment('<spam> & ham')
+ # comments are not escaped
+ self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
+
+ def test_processinginstruction_serialization(self):
# Test ProcessingInstruction directly
self.assertEqual(ET.tostring(ET.ProcessingInstruction('test',
'instruction')),
@@ -1256,13 +1261,22 @@ def test_processinginstruction(self):
b'<?test instruction?>')
# Issue #2746
-
+ # processing instructions are not escaped
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
b'<?test <testing&>?>')
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'),
'latin-1'),
b"<?xml version='1.0' encoding='latin-1'?>\n"
b"<?test <testing&>\xe3?>")
+ @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed",
"noframes"))
+ def test_html_cdata_elems_serialization(self, tag):
+ # content of raw text elements is not escaped in html
+ tag = tag.title()
+ elem = ET.Element(tag)
+ elem.text = '<spam>&ham'
+ self.assertEqual(ET.tostring(elem, method='html'),
+ ('<%s><spam>&ham</%s>' % (tag, tag)).encode())
+
def test_html_empty_elems_serialization(self):
# issue 15970
# from http://www.w3.org/TR/html401/index/elements.html
@@ -1277,6 +1291,14 @@ def test_html_empty_elems_serialization(self):
method='html')
self.assertEqual(serialized, expected)
+ def test_html_plaintext_serialization(self):
+ # content of plaintext is not escaped in html
+ # no end tag for plaintext
+ elem = ET.Element('PlainText')
+ elem.text = '<spam>&ham'
+ self.assertEqual(ET.tostring(elem, method='html'),
+ b'<PlainText><spam>&ham')
+
def test_dump_attribute_order(self):
# See BPO 34160
e = ET.Element('cirriculum', status='public', company='example')
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 5d8b22ffb62c0dd..0a4203d372ce991 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -917,9 +917,12 @@ def _serialize_xml(write, elem, qnames, namespaces,
if elem.tail:
write(_escape_cdata(elem.tail))
+_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
+ "noframes", "plaintext"}
+
HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param", "source",
- "track", "wbr"}
+ "track", "wbr", "plaintext"}
def _serialize_html(write, elem, qnames, namespaces, **kwargs):
tag = elem.tag
@@ -960,7 +963,7 @@ def _serialize_html(write, elem, qnames, namespaces,
**kwargs):
write(">")
ltag = tag.lower()
if text:
- if ltag == "script" or ltag == "style":
+ if ltag in _CDATA_CONTENT_ELEMENTS:
write(text)
else:
write(_escape_cdata(text))
diff --git
a/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
new file mode 100644
index 000000000000000..4f47d36fe2c8369
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst
@@ -0,0 +1,3 @@
+Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
+elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer
+escaped. The "plaintext" element no longer have the closing tag.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]