https://github.com/python/cpython/commit/a5c7a7441870e045eb7589d1a1ff93d9423dea03
commit: a5c7a7441870e045eb7589d1a1ff93d9423dea03
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-06T14:40:10Z
summary:
gh-139489: Add xml.is_valid_text() (GH-149412)
files:
A Misc/NEWS.d/next/Library/2026-05-05-13-12-58.gh-issue-139489.a8qqIM.rst
M Doc/library/xml.rst
M Doc/whatsnew/3.15.rst
M Lib/test/test_xml.py
M Lib/xml/utils.py
diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst
index f9ffaa9a94aacc..98be50e15ff463 100644
--- a/Doc/library/xml.rst
+++ b/Doc/library/xml.rst
@@ -54,7 +54,19 @@ This module also defines utility functions.
"!", "?", and "=" are forbidden.
The name cannot start with a digit or a character like "-", ".", and "ยท".
- ..versionadded:: next
+ .. versionadded:: next
+
+
+.. function:: is_valid_text(data)
+
+ Return ``True`` if the string is a sequence of legal XML 1.0 characters,
+ ``False`` otherwise.
+
+ Almost all characters are permitted in XML 1.0 documents, except C0 control
+ characters (excluding TAB, CR and LF), surrogate characters and special
+ Unicode characters U+FFFE and U+FFFF.
+
+ .. versionadded:: next
.. _xml-security:
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 6007d772f8e2d7..698a9f88e1ee39 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -1714,6 +1714,10 @@ xml
whether a string can be used as an element or attribute name in XML.
(Contributed by Serhiy Storchaka in :gh:`139489`.)
+* Add the :func:`xml.is_valid_text` function, which allows to check
+ whether a string can be used in the XML document.
+ (Contributed by Serhiy Storchaka in :gh:`139489`.)
+
xml.parsers.expat
-----------------
diff --git a/Lib/test/test_xml.py b/Lib/test/test_xml.py
index fd3633e43982d7..3a8b92048166f2 100644
--- a/Lib/test/test_xml.py
+++ b/Lib/test/test_xml.py
@@ -22,6 +22,22 @@ def test_is_valid_name(self):
for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000':
self.assertFalse(is_valid_name('name' + c))
+ def test_is_valid_text(self):
+ is_valid_text = xml.is_valid_text
+ self.assertTrue(is_valid_text(''))
+ self.assertTrue(is_valid_text('!0Aa_~ \r\n\t\x85\xa0'))
+
self.assertTrue(is_valid_text('\ud7ff\ue000\ufffd\U00010000\U0010ffff'))
+ self.assertFalse(is_valid_text('\x00'))
+ self.assertFalse(is_valid_text('\x01'))
+ self.assertFalse(is_valid_text('\x1f'))
+ self.assertTrue(is_valid_text('\x7f'))
+ self.assertTrue(is_valid_text('\x80'))
+ self.assertTrue(is_valid_text('\x9f'))
+ self.assertFalse(is_valid_text('\ud800'))
+ self.assertFalse(is_valid_text('\udfff'))
+ self.assertFalse(is_valid_text('\ufffe'))
+ self.assertFalse(is_valid_text('\uffff'))
+
if __name__ == '__main__':
unittest.main()
diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py
index c9a0b260675bed..532aa224dae677 100644
--- a/Lib/xml/utils.py
+++ b/Lib/xml/utils.py
@@ -23,3 +23,15 @@ def is_valid_name(name):
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
']*+',
name) is not None
+
+# https://www.w3.org/TR/xml/#charsets
+_ILLEGAL_XML_CHAR = (
+ '['
+ '\x00-\x08\x0B\x0C\x0E-\x1F' # C0 controls except TAB, CR and LF
+ '\uD800-\uDFFF' # the surrogate blocks
+ '\uFFFE\uFFFF' # special Unicode characters
+ ']')
+
+def is_valid_text(data):
+ """Test whether a string is a sequence of legal XML 1.0 characters."""
+ return _re.search(_ILLEGAL_XML_CHAR, data) is None
diff --git
a/Misc/NEWS.d/next/Library/2026-05-05-13-12-58.gh-issue-139489.a8qqIM.rst
b/Misc/NEWS.d/next/Library/2026-05-05-13-12-58.gh-issue-139489.a8qqIM.rst
new file mode 100644
index 00000000000000..c76879d3025bb6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-05-05-13-12-58.gh-issue-139489.a8qqIM.rst
@@ -0,0 +1,2 @@
+Add the :func:`xml.is_valid_text` function, which allows to check whether
+a string can be used in the XML document.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]