We recently started stripping comments and folding white space from the In-Reply-To and References headers. Do so also for the Message-ID header.
Signed-off-by: Stephen Finucane <step...@that.guru> Related: #399 --- patchwork/parser.py | 43 ++++++++++++++++++++++++++-------- patchwork/tests/test_parser.py | 32 +++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 10 deletions(-) diff --git patchwork/parser.py patchwork/parser.py index 17cc2325..f219f466 100644 --- patchwork/parser.py +++ patchwork/parser.py @@ -236,15 +236,14 @@ def _find_series_by_references(project, mail): name, prefixes = clean_subject(subject, [project.linkname]) version = parse_version(name, prefixes) - refs = find_references(mail) - h = clean_header(mail.get('Message-Id')) - if h: - refs = [h] + refs + msg_id = find_message_id(mail) + refs = [msg_id] + find_references(mail) for ref in refs: try: series = SeriesReference.objects.get( - msgid=ref[:255], project=project).series + msgid=ref[:255], project=project, + ).series if series.version != version: # if the versions don't match, at least make sure these were @@ -473,6 +472,34 @@ def find_headers(mail): return '\n'.join(strings) +def find_message_id(mail): + """Extract the 'message-id' headers from a given mail and validate it. + + The validation here is simply checking that the Message-ID is correctly + formatted per RFC-2822. However, even if it's not we'll attempt to use what + we're given because a patch tracked in Patchwork with janky threading is + better than no patch whatsoever. + """ + header = clean_header(mail.get('Message-Id')) + if not header: + raise ValueError("Broken 'Message-Id' header") + + msgid = _msgid_re.search(header) + if msgid: + msgid = msgid.group(0) + else: + # This is only info level since the admin likely can't do anything + # about this + logger.info( + "Malformed 'Message-Id' header. The 'msg-id' component should be " + "surrounded by angle brackets. Saving raw header. This may " + "include comments and extra comments." + ) + msgid = header + + return msgid[:255] + + def find_references(mail): """Construct a list of possible reply message ids. @@ -1062,11 +1089,7 @@ def parse_mail(mail, list_id=None): # parse metadata - msgid = clean_header(mail.get('Message-Id')) - if not msgid: - raise ValueError("Broken 'Message-Id' header") - msgid = msgid[:255] - + msgid = find_message_id(mail) subject = mail.get('Subject') name, prefixes = clean_subject(subject, [project.linkname]) is_comment = subject_check(subject) diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py index f65ad4b1..980a8afb 100644 --- patchwork/tests/test_parser.py +++ patchwork/tests/test_parser.py @@ -1265,6 +1265,38 @@ class DuplicateMailTest(TestCase): self.assertEqual(Cover.objects.count(), 1) +class TestFindMessageID(TestCase): + + def test_find_message_id__missing_header(self): + email = create_email('test') + del email['Message-Id'] + email['Message-Id'] = '' + + with self.assertRaises(ValueError) as cm: + parser.find_message_id(email) + self.assertIn("Broken 'Message-Id' header", str(cm.exeception)) + + def test_find_message_id__header_with_comments(self): + """Test that we strip comments from the Message-ID field.""" + message_id = '<xnzgy1de8d....@rhel8.vm> (message ID with a comment)' + email = create_email('test', msgid=message_id) + + expected = '<xnzgy1de8d....@rhel8.vm>' + actual = parser.find_message_id(email) + + self.assertEqual(expected, actual) + + def test_find_message_id__invalid_header_fallback(self): + """Test that we accept badly formatted Message-ID fields.""" + message_id = '5899d592-8c87-47d9-92b6-d34260ce1...@radware.com>' + email = create_email('test', msgid=message_id) + + expected = '5899d592-8c87-47d9-92b6-d34260ce1...@radware.com>' + actual = parser.find_message_id(email) + + self.assertEqual(expected, actual) + + class TestFindReferences(TestCase): def test_find_references__header_with_comments(self): -- 2.35.1 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork