RFC2822 states that [1] a comment or folding white space is permitted to be inserted before or after a msg-id in in the Message-ID, In-Reply-To or References fields. Allow for this.
[1] https://tools.ietf.org/html/rfc2822#section-3.6.4 Signed-off-by: Stephen Finucane <step...@that.guru> Closes: #399 --- patchwork/parser.py | 13 ++- patchwork/tests/test_parser.py | 79 +++++++++++++++++-- .../notes/issue-399-09d6f17aa54b14b2.yaml | 11 +++ 3 files changed, 89 insertions(+), 14 deletions(-) create mode 100644 releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml diff --git patchwork/parser.py patchwork/parser.py index e6e1a7fb..17cc2325 100644 --- patchwork/parser.py +++ patchwork/parser.py @@ -31,6 +31,7 @@ from patchwork.models import SeriesReference from patchwork.models import State +_msgid_re = re.compile(r'<[^>]+>') _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile(r'^(---|\+\+\+) (\S+)') list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list'] @@ -483,19 +484,15 @@ def find_references(mail): if 'In-Reply-To' in mail: for in_reply_to in mail.get_all('In-Reply-To'): - r = clean_header(in_reply_to) - if r: - refs.append(r) + ref = _msgid_re.search(clean_header(in_reply_to)) + if ref: + refs.append(ref.group(0)) if 'References' in mail: for references_header in mail.get_all('References'): - h = clean_header(references_header) - if not h: - continue - references = h.split() + references = _msgid_re.findall(clean_header(references_header)) references.reverse() for ref in references: - ref = ref.strip() if ref not in refs: refs.append(ref) diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py index 12f984bc..f65ad4b1 100644 --- patchwork/tests/test_parser.py +++ patchwork/tests/test_parser.py @@ -74,6 +74,7 @@ def _create_email( sender=None, listid=None, in_reply_to=None, + references=None, headers=None, ): msg['Message-Id'] = msgid or make_msgid() @@ -84,6 +85,9 @@ def _create_email( if in_reply_to: msg['In-Reply-To'] = in_reply_to + if references: + msg['References'] = references + for header in headers or {}: msg[header] = headers[header] @@ -97,12 +101,21 @@ def create_email( sender=None, listid=None, in_reply_to=None, + references=None, headers=None, ): msg = MIMEText(content, _charset='us-ascii') return _create_email( - msg, msgid, subject, sender, listid, in_reply_to, headers) + msg, + msgid, + subject, + sender, + listid, + in_reply_to, + references, + headers, + ) def parse_mail(*args, **kwargs): @@ -1216,7 +1229,7 @@ class DuplicateMailTest(TestCase): def test_duplicate_patch(self): diff = read_patch('0001-add-line.patch') - m = create_email(diff, listid=self.listid, msgid='1...@example.com') + m = create_email(diff, listid=self.listid, msgid='<1...@example.com>') self._test_duplicate_mail(m) @@ -1224,18 +1237,26 @@ class DuplicateMailTest(TestCase): def test_duplicate_comment(self): diff = read_patch('0001-add-line.patch') - m1 = create_email(diff, listid=self.listid, msgid='1...@example.com') + m1 = create_email( + diff, + listid=self.listid, + msgid='<1...@example.com>', + ) _parse_mail(m1) - m2 = create_email('test', listid=self.listid, msgid='2...@example.com', - in_reply_to='1...@example.com') + m2 = create_email( + 'test', + listid=self.listid, + msgid='<2...@example.com>', + in_reply_to='<1...@example.com>', + ) self._test_duplicate_mail(m2) self.assertEqual(Patch.objects.count(), 1) self.assertEqual(PatchComment.objects.count(), 1) def test_duplicate_coverletter(self): - m = create_email('test', listid=self.listid, msgid='1...@example.com') + m = create_email('test', listid=self.listid, msgid='<1...@example.com>') del m['Subject'] m['Subject'] = '[PATCH 0/1] test cover letter' @@ -1244,6 +1265,52 @@ class DuplicateMailTest(TestCase): self.assertEqual(Cover.objects.count(), 1) +class TestFindReferences(TestCase): + + def test_find_references__header_with_comments(self): + """Test that we strip comments from References, In-Reply-To fields.""" + in_reply_to = ( + '<4574b99b-edac-d8dc-9141-79c3109d2...@huawei.com> (message from\n' + ' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)' + ) + email = create_email('test', in_reply_to=in_reply_to) + + expected = ['<4574b99b-edac-d8dc-9141-79c3109d2...@huawei.com>'] + actual = parser.find_references(email) + + self.assertEqual(expected, actual) + + def test_find_references__duplicate_references(self): + """Test that we ignore duplicate message IDs in 'References'.""" + message_id = '<20130510114450.7104c...@nehalam.linuxnetplumber.net>' + in_reply_to = \ + '<525534677.5312512.1368202896189.javamail.r...@vmware.com>' + references = ( + '<afcfceb8eb0e24448e4eb95988ba1e531fa2e...@xmb-aln-x05.cisco.com>\n' # noqa: E501 + ' <cae68auor7b5a2qvdujhh0kehpi+sr9x3qfrtumglxt1bk4v...@mail.gmail.com>\n' # noqa: E501 + ' <1676591087.5291867.1368201908283.javamail.r...@vmware.com>\n' + ' <20130510091549.3c064...@nehalam.linuxnetplumber.net>\n' + ' <525534677.5312512.1368202896189.javamail.r...@vmware.com>' + ) + email = create_email( + 'test', + msgid=message_id, + in_reply_to=in_reply_to, + references=references, + ) + + expected = [ + '<525534677.5312512.1368202896189.javamail.r...@vmware.com>', + '<20130510091549.3c064...@nehalam.linuxnetplumber.net>', + '<1676591087.5291867.1368201908283.javamail.r...@vmware.com>', + '<cae68auor7b5a2qvdujhh0kehpi+sr9x3qfrtumglxt1bk4v...@mail.gmail.com>', # noqa: E501 + '<afcfceb8eb0e24448e4eb95988ba1e531fa2e...@xmb-aln-x05.cisco.com>', + ] + actual = parser.find_references(email) + + self.assertEqual(expected, actual) + + class TestCommentCorrelation(TestCase): def test_find_patch_for_comment__no_reply(self): diff --git releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml new file mode 100644 index 00000000..e925a606 --- /dev/null +++ releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Comments and whitespace are now correctly stripped from the ``Message-ID``, + ``In-Reply-To``, and ``References`` headers. One side effect of this change + is that the parser is now stricter with regards to the format of the + ``msg-id`` component of these headers: all identifiers must now be + surrounded by angle brackets, e.g. ``<abc...@example.com>``. This is + mandated in the spec and a review of mailing lists archives suggest it is + broadly adhered to. Without these markers, there is no way to delimit + ``msg-id`` from any surrounding comments and whitespace. -- 2.35.1 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork