https://github.com/python/cpython/commit/4ed11d3cd288e6b90196a15c5a825a45d318fe47
commit: 4ed11d3cd288e6b90196a15c5a825a45d318fe47
branch: 3.12
author: Seth Michael Larson <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-01-25T17:05:57Z
summary:

[3.12] gh-143925: Reject control characters in data: URL mediatypes (#144113)

* [3.12] gh-143925: Reject control characters in data: URL mediatypes
(cherry picked from commit f25509e78e8be6ea73c811ac2b8c928c28841b9f)
(cherry picked from commit 2c9c746077d8119b5bcf5142316992e464594946)

Co-authored-by: Seth Michael Larson <[email protected]>

* Remove "ExtraAssertations"

files:
A Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
M Lib/test/test_urllib.py
M Lib/urllib/request.py

diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 9becb72c741511..0d46a455664bad 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -12,6 +12,7 @@
 from test.support import os_helper
 from test.support import socket_helper
 from test.support import warnings_helper
+from test.support import control_characters_c0
 import os
 try:
     import ssl
@@ -688,6 +689,13 @@ def test_invalid_base64_data(self):
         # missing padding character
         self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
 
+    def test_invalid_mediatype(self):
+        for c0 in control_characters_c0():
+            self.assertRaises(ValueError,urllib.request.urlopen,
+                              f'data:text/html;{c0},data')
+        for c0 in control_characters_c0():
+            self.assertRaises(ValueError,urllib.request.urlopen,
+                              f'data:text/html{c0};base64,ZGF0YQ==')
 
 class urlretrieve_FileTests(unittest.TestCase):
     """Test urllib.urlretrieve() on local files"""
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index c7ded0f67fc67e..16449d6ff71939 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1655,6 +1655,11 @@ def data_open(self, req):
         scheme, data = url.split(":",1)
         mediatype, data = data.split(",",1)
 
+        # Disallow control characters within mediatype.
+        if re.search(r"[\x00-\x1F\x7F]", mediatype):
+            raise ValueError(
+                "Control characters not allowed in data: mediatype")
+
         # even base64 encoded data URLs might be quoted so unquote in any case:
         data = unquote_to_bytes(data)
         if mediatype.endswith(";base64"):
diff --git 
a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst 
b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
new file mode 100644
index 00000000000000..46109dfbef3ee7
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
@@ -0,0 +1 @@
+Reject control characters in ``data:`` URL media types.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to