https://github.com/python/cpython/commit/c432d0147bdf1a66604e7a3d6a71660ae79b5f45
commit: c432d0147bdf1a66604e7a3d6a71660ae79b5f45
branch: main
author: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్  రెడ్డి) 
<thatiparthysreeni...@gmail.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-03-30T12:29:29Z
summary:

gh-127794: Validate email header names according to RFC 5322 (#127820)

`email.message.Message` objects now validate header names specified via 
`__setitem__`
or `add_header` according to RFC 5322, §2.2 [1].

In particular, callers should expect a ValueError to be raised for invalid 
header names.

[1]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2

---------

Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
Co-authored-by: R. David Murray <rdmur...@bitdance.com>

files:
A Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst
M Lib/email/_policybase.py
M Lib/email/policy.py
M Lib/test/test_email/test_email.py
M Lib/test/test_email/test_message.py

diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py
index 4b63b97217a835..95e79b8938bb4c 100644
--- a/Lib/email/_policybase.py
+++ b/Lib/email/_policybase.py
@@ -4,6 +4,7 @@
 """
 
 import abc
+import re
 from email import header
 from email import charset as _charset
 from email.utils import _has_surrogates
@@ -14,6 +15,14 @@
     'compat32',
     ]
 
+# validation regex from RFC 5322, equivalent to pattern 
re.compile("[!-9;-~]+$")
+valid_header_name_re = re.compile("[\041-\071\073-\176]+$")
+
+def validate_header_name(name):
+    # Validate header name according to RFC 5322
+    if not valid_header_name_re.match(name):
+        raise ValueError(
+            f"Header field name contains invalid characters: {name!r}")
 
 class _PolicyBase:
 
@@ -314,6 +323,7 @@ def header_store_parse(self, name, value):
         """+
         The name and value are returned unmodified.
         """
+        validate_header_name(name)
         return (name, value)
 
     def header_fetch_parse(self, name, value):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index 6e109b65011a44..4169150101a29d 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -4,7 +4,13 @@
 
 import re
 import sys
-from email._policybase import Policy, Compat32, compat32, _extend_docstrings
+from email._policybase import (
+    Compat32,
+    Policy,
+    _extend_docstrings,
+    compat32,
+    validate_header_name
+)
 from email.utils import _has_surrogates
 from email.headerregistry import HeaderRegistry as HeaderRegistry
 from email.contentmanager import raw_data_manager
@@ -138,6 +144,7 @@ def header_store_parse(self, name, value):
         CR or LF characters.
 
         """
+        validate_header_name(name)
         if hasattr(value, 'name') and value.name.lower() == name.lower():
             return (name, value)
         if isinstance(value, str) and len(value.splitlines())>1:
diff --git a/Lib/test/test_email/test_email.py 
b/Lib/test/test_email/test_email.py
index 2deb35721576b8..724af3b787d38b 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -728,6 +728,31 @@ def test_nonascii_add_header_with_tspecial(self):
             "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
             msg['Content-Disposition'])
 
+    def test_invalid_header_names(self):
+        invalid_headers = [
+            ('Invalid Header', 'contains space'),
+            ('Tab\tHeader', 'contains tab'),
+            ('Colon:Header', 'contains colon'),
+            ('', 'Empty name'),
+            (' LeadingSpace', 'starts with space'),
+            ('TrailingSpace ', 'ends with space'),
+            ('Header\x7F', 'Non-ASCII character'),
+            ('Header\x80', 'Extended ASCII'),
+        ]
+        for policy in (email.policy.default, email.policy.compat32):
+            for setter in (Message.__setitem__, Message.add_header):
+                for name, value in invalid_headers:
+                    self.do_test_invalid_header_names(
+                        policy, setter,name, value)
+
+    def do_test_invalid_header_names(self, policy, setter, name, value):
+        with self.subTest(policy=policy, setter=setter, name=name, 
value=value):
+            message = Message(policy=policy)
+            pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)'
+            with self.assertRaisesRegex(ValueError, pattern) as cm:
+                 setter(message, name, value)
+            self.assertIn(f"{name!r}", str(cm.exception))
+
     def test_binary_quopri_payload(self):
         for charset in ('latin-1', 'ascii'):
             msg = Message()
diff --git a/Lib/test/test_email/test_message.py 
b/Lib/test/test_email/test_message.py
index 96979db27f3a21..23c39775a8b2e5 100644
--- a/Lib/test/test_email/test_message.py
+++ b/Lib/test/test_email/test_message.py
@@ -1004,6 +1004,30 @@ def test_folding_with_long_nospace_http_policy_1(self):
         parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
         self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
 
+    def test_invalid_header_names(self):
+        invalid_headers = [
+            ('Invalid Header', 'contains space'),
+            ('Tab\tHeader', 'contains tab'),
+            ('Colon:Header', 'contains colon'),
+            ('', 'Empty name'),
+            (' LeadingSpace', 'starts with space'),
+            ('TrailingSpace ', 'ends with space'),
+            ('Header\x7F', 'Non-ASCII character'),
+            ('Header\x80', 'Extended ASCII'),
+        ]
+        for email_policy in (policy.default, policy.compat32):
+            for setter in (EmailMessage.__setitem__, EmailMessage.add_header):
+                for name, value in invalid_headers:
+                    self.do_test_invalid_header_names(email_policy, setter, 
name, value)
+
+    def do_test_invalid_header_names(self, policy, setter, name, value):
+        with self.subTest(policy=policy, setter=setter, name=name, 
value=value):
+            message = EmailMessage(policy=policy)
+            pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)'
+            with self.assertRaisesRegex(ValueError, pattern) as cm:
+                 setter(message, name, value)
+            self.assertIn(f"{name!r}", str(cm.exception))
+
     def test_get_body_malformed(self):
         """test for bpo-42892"""
         msg = textwrap.dedent("""\
diff --git 
a/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst 
b/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst
new file mode 100644
index 00000000000000..b6e39d66d04221
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst
@@ -0,0 +1,4 @@
+When headers are added to :class:`email.message.Message` objects, either 
through
+:meth:`email.message.Message.__setitem__` or 
:meth:`email.message.Message.add_header`,
+the field name is now validated according to :rfc:`RFC 5322, Section 2.2 
<5322#section-2.2>`
+and a :exc:`ValueError` is raised if the field name contains any invalid 
characters.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to