https://github.com/python/cpython/commit/c084a665689f914cb11f2b6e50cf8b11dccbae1f
commit: c084a665689f914cb11f2b6e50cf8b11dccbae1f
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: bitdancer <[email protected]>
date: 2025-12-21T14:36:23-05:00
summary:

[3.13] gh-79986: Add parsing for References/In-Reply-To email headers 
(GH-137201) (#142574)

gh-79986: Add parsing for References/In-Reply-To email headers (GH-137201)

This is a followup to 46d88a113142b26c01c95c93846a89318ba87ffc (GH-13397),
which added parsing for Message-ID. Similar handling is needed for the
other two identification headers.
(cherry picked from commit 79aa43a9797de64a3c42794f34329eab638dd67a)

Co-authored-by: elenril <[email protected]>

files:
A Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst
M Lib/email/_header_value_parser.py
M Lib/email/headerregistry.py
M Lib/test/test_email/test__header_value_parser.py
M Lib/test/test_email/test_headerregistry.py

diff --git a/Lib/email/_header_value_parser.py 
b/Lib/email/_header_value_parser.py
index 41401f8f8d54da..68c2cf9585c5b4 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -874,6 +874,12 @@ class MessageID(MsgID):
 class InvalidMessageID(MessageID):
     token_type = 'invalid-message-id'
 
+class MessageIDList(TokenList):
+    token_type = 'message-id-list'
+
+    @property
+    def message_ids(self):
+        return [x for x in self if x.token_type=='msg-id']
 
 class Header(TokenList):
     token_type = 'header'
@@ -2171,6 +2177,32 @@ def parse_message_id(value):
 
     return message_id
 
+def parse_message_ids(value):
+    """in-reply-to     =   "In-Reply-To:" 1*msg-id CRLF
+       references      =   "References:" 1*msg-id CRLF
+    """
+    message_id_list = MessageIDList()
+    while value:
+        if value[0] == ',':
+            # message id list separated with commas - this is invalid,
+            # but happens rather frequently in the wild
+            message_id_list.defects.append(
+                errors.InvalidHeaderDefect("comma in msg-id list"))
+            message_id_list.append(
+                WhiteSpaceTerminal(' ', 'invalid-comma-replacement'))
+            value = value[1:]
+            continue
+        try:
+            token, value = get_msg_id(value)
+            message_id_list.append(token)
+        except errors.HeaderParseError as ex:
+            token = get_unstructured(value)
+            message_id_list.append(InvalidMessageID(token))
+            message_id_list.defects.append(
+                errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
+            break
+    return message_id_list
+
 #
 # XXX: As I begin to add additional header parsers, I'm realizing we probably
 # have two level of parser routines: the get_XXX methods that get a token in
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py
index 543141dc427ebe..0e8698efc0b966 100644
--- a/Lib/email/headerregistry.py
+++ b/Lib/email/headerregistry.py
@@ -534,6 +534,18 @@ def parse(cls, value, kwds):
         kwds['defects'].extend(parse_tree.all_defects)
 
 
+class ReferencesHeader:
+
+    max_count = 1
+    value_parser = staticmethod(parser.parse_message_ids)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+        kwds['decoded'] = str(parse_tree)
+        kwds['defects'].extend(parse_tree.all_defects)
+
+
 # The header factory #
 
 _default_header_map = {
@@ -557,6 +569,8 @@ def parse(cls, value, kwds):
     'content-disposition':          ContentDispositionHeader,
     'content-transfer-encoding':    ContentTransferEncodingHeader,
     'message-id':                   MessageIDHeader,
+    'in-reply-to':                  ReferencesHeader,
+    'references':                   ReferencesHeader,
     }
 
 class HeaderRegistry:
diff --git a/Lib/test/test_email/test__header_value_parser.py 
b/Lib/test/test_email/test__header_value_parser.py
index 561ded77334fc9..95764f69b06b1b 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2867,6 +2867,81 @@ def test_get_msg_id_ws_only_local(self):
         )
         self.assertEqual(msg_id.token_type, 'msg-id')
 
+    def test_parse_message_ids_valid(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            "<foo@bar> <bar@foo>",
+            "<foo@bar> <bar@foo>",
+            "<foo@bar> <bar@foo>",
+            [],
+            )
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
+    def test_parse_message_ids_empty(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            " ",
+            " ",
+            " ",
+            [errors.InvalidHeaderDefect],
+            )
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
+    def test_parse_message_ids_comment(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            "<foo@bar> (foo's message from \"bar\")",
+            "<foo@bar> (foo's message from \"bar\")",
+            "<foo@bar> ",
+            [],
+            )
+        self.assertEqual(message_ids.message_ids[0].value, '<foo@bar> ')
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
+    def test_parse_message_ids_no_sep(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            "<foo@bar><bar@foo>",
+            "<foo@bar><bar@foo>",
+            "<foo@bar><bar@foo>",
+            [],
+            )
+        self.assertEqual(message_ids.message_ids[0].value, '<foo@bar>')
+        self.assertEqual(message_ids.message_ids[1].value, '<bar@foo>')
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
+    def test_parse_message_ids_comma_sep(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            "<foo@bar>,<bar@foo>",
+            "<foo@bar> <bar@foo>",
+            "<foo@bar> <bar@foo>",
+            [errors.InvalidHeaderDefect],
+            )
+        self.assertEqual(message_ids.message_ids[0].value, '<foo@bar>')
+        self.assertEqual(message_ids.message_ids[1].value, '<bar@foo>')
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
+    def test_parse_message_ids_invalid_id(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            "<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
+            "<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
+            "<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
+            [errors.InvalidHeaderDefect]*2,
+            )
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
+    def test_parse_message_ids_broken_ang(self):
+        message_ids = self._test_parse_x(
+            parser.parse_message_ids,
+            "<foo@bar> >bar@foo",
+            "<foo@bar> >bar@foo",
+            "<foo@bar> >bar@foo",
+            [errors.InvalidHeaderDefect]*1,
+            )
+        self.assertEqual(message_ids.token_type, 'message-id-list')
+
 
 
 @parameterize
diff --git a/Lib/test/test_email/test_headerregistry.py 
b/Lib/test/test_email/test_headerregistry.py
index a579f9eeb7f4bf..ff300f6e133130 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1813,5 +1813,18 @@ def test_message_id_header_is_not_folded(self):
             h.fold(policy=policy.default.clone(max_line_length=20)),
             'Message-ID:\n <ईमेलfromMessage@wők.com>\n')
 
+    def test_fold_references(self):
+        h = self.make_header(
+            'References',
+            '<[email protected]> '
+            '<[email protected]>'
+            )
+        self.assertEqual(
+            h.fold(policy=policy.default.clone(max_line_length=20)),
+            'References: '
+            '<[email protected]>\n'
+            ' <[email protected]>\n')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git 
a/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst 
b/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst
new file mode 100644
index 00000000000000..327bbf869bce09
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst
@@ -0,0 +1,3 @@
+Add parsing for ``References`` and ``In-Reply-To`` headers to the :mod:`email`
+library that parses the header content as lists of message id tokens.  This
+prevents them from being folded incorrectly.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to