On 09/12/2016 04:53 PM, Stephen Finucane wrote: > It is now possible to parse and store series, so do just that. > The parsing at the moment is based on both RFC822 headers and > subject lines. > > Signed-off-by: Stephen Finucane <[email protected]>
thanks for the tests! Reviewed-by: Andy Doan <[email protected]> > --- > v3: > - Rework how nested series are handled once again > - Don't search for references when creating a cover letter > v2: > - Rebase onto master, moving changes into 'parser' > - Add unit tests > - Merge "Handle 'xxx (v2)' style messages" patch > - Merge "Handle series sent 'in-reply-to'" patch > - Handle capitalized version prefixes like [V2] > - Trivial cleanup of some parser functions > --- > patchwork/models.py | 7 ++- > patchwork/parser.py | 127 > +++++++++++++++++++++++++++++++++++++---- > patchwork/tests/test_parser.py | 111 +++++++++++++++++++++++++++++------ > patchwork/tests/utils.py | 32 +++++++++++ > 4 files changed, 247 insertions(+), 30 deletions(-) > > diff --git a/patchwork/models.py b/patchwork/models.py > index 2875369..52325d2 100644 > --- a/patchwork/models.py > +++ b/patchwork/models.py > @@ -230,8 +230,11 @@ class SeriesRevision(models.Model): > try: > return self.cover_letter.name > except CoverLetter.DoesNotExist: > - return '[Series #%d, revision #%d]' % (self.group.id, > - self.version) > + if self.group: > + return '[Series #%d, revision #%d]' % (self.group.id, > + self.version) > + else: > + return '[Untitled series]' > > @property > def actual_total(self): > diff --git a/patchwork/parser.py b/patchwork/parser.py > index 1805df8..d20ac19 100644 > --- a/patchwork/parser.py > +++ b/patchwork/parser.py > @@ -21,8 +21,10 @@ > > import codecs > import datetime > -from email.header import Header, decode_header > -from email.utils import parsedate_tz, mktime_tz > +from email.header import Header > +from email.header import decode_header > +from email.utils import parsedate_tz > +from email.utils import mktime_tz > from fnmatch import fnmatch > from functools import reduce > import logging > @@ -33,9 +35,17 @@ from django.contrib.auth.models import User > from django.utils import six > from django.utils.six.moves import map > > -from patchwork.models import (Patch, Project, Person, Comment, State, > - DelegationRule, Submission, CoverLetter, > - get_default_initial_patch_state) > +from patchwork.models import Comment > +from patchwork.models import CoverLetter > +from patchwork.models import DelegationRule > +from patchwork.models import get_default_initial_patch_state > +from patchwork.models import Patch > +from patchwork.models import Person > +from patchwork.models import Project > +from patchwork.models import SeriesRevision > +from patchwork.models import SeriesReference > +from patchwork.models import State > +from patchwork.models import Submission > > > _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') > @@ -100,6 +110,34 @@ def find_project_by_header(mail): > return project > > > +def find_series(mail): > + """Find a patch's `SeriesRevision`. > + > + Traverse RFC822 headers, starting with most recent first, to find > + ancestors and the related series. Headers are traversed in reverse > + to handle series sent in reply to previous series, like so: > + > + [PATCH 0/3] A cover letter > + [PATCH 1/3] The first patch > + ... > + [PATCH v2 0/3] A cover letter > + [PATCH v2 1/3] The first patch > + ... > + > + Args: > + mail (email.message.Message): The mail to extract series from > + > + Returns: > + The matching `SeriesRevision` instance, if any > + """ > + for ref in find_references(mail, True): > + # try parsing by RFC5322 fields first > + try: > + return SeriesReference.objects.get(msgid=ref).series > + except SeriesReference.DoesNotExist: > + pass > + > + > def find_author(mail): > from_header = clean_header(mail.get('From')) > name, email = (None, None) > @@ -161,10 +199,13 @@ def find_headers(mail): > for (k, v) in list(mail.items())]) > > > -def find_references(mail): > +def find_references(mail, include_msgid=False): > """Construct a list of possible reply message ids.""" > refs = [] > > + if include_msgid: > + refs.append(mail.get('Message-ID')) > + > if 'In-Reply-To' in mail: > refs.append(mail.get('In-Reply-To')) > > @@ -178,6 +219,13 @@ def find_references(mail): > return refs > > > +def _parse_prefixes(subject_prefixes, regex): > + for prefix in subject_prefixes: > + m = regex.match(prefix) > + if m: > + return m > + > + > def parse_series_marker(subject_prefixes): > """Extract series markers from subject. > > @@ -193,14 +241,36 @@ def parse_series_marker(subject_prefixes): > """ > > regex = re.compile('^([0-9]+)/([0-9]+)$') > - for prefix in subject_prefixes: > - m = regex.match(prefix) > - if not m: > - continue > + m = _parse_prefixes(subject_prefixes, regex) > + if m: > return (int(m.group(1)), int(m.group(2))) > + > return (None, None) > > > +def parse_version(subject, subject_prefixes): > + """Extract patch version. > + > + Args: > + subject: Main body of subject line > + subject_prefixes: List of subject prefixes to extract version > + from > + > + Returns: > + version if found, else 1 > + """ > + regex = re.compile('^[vV](\d+)$') > + m = _parse_prefixes(subject_prefixes, regex) > + if m: > + return int(m.group(1)) > + > + m = re.search(r'\([vV](\d+)\)', subject) > + if m: > + return int(m.group(1)) > + > + return 1 > + > + > def find_content(project, mail): > """Extract a comment and potential diff from a mail.""" > patchbuf = None > @@ -622,6 +692,7 @@ def parse_mail(mail, list_id=None): > author = find_author(mail) > name, prefixes = clean_subject(mail.get('Subject'), [project.linkname]) > x, n = parse_series_marker(prefixes) > + version = parse_version(name, prefixes) > refs = find_references(mail) > date = find_date(mail) > headers = find_headers(mail) > @@ -638,9 +709,22 @@ def parse_mail(mail, list_id=None): > filenames = find_filenames(diff) > delegate = auto_delegate(project, filenames) > > + series = find_series(mail) > + if not series and n: # the series markers indicates a series > + series = SeriesRevision(date=date, > + submitter=author, > + version=version, > + total=n) > + series.save() > + > + for ref in refs + [msgid]: # save references for series > + # we don't want duplicates > + SeriesReference.objects.get_or_create(series=series, > msgid=ref) > + > patch = Patch( > msgid=msgid, > project=project, > + series=series, > name=name, > date=date, > headers=headers, > @@ -674,9 +758,32 @@ def parse_mail(mail, list_id=None): > if is_cover_letter: > author.save() > > + # we don't use 'find_series' here as a cover letter will > + # always be the first item in a thread, thus the references > + # could only point to a different series or unrelated > + # message > + try: > + series = SeriesReference.objects.get(msgid=msgid).series > + except SeriesReference.DoesNotExist: > + series = None > + > + if not series: > + series = SeriesRevision(date=date, > + submitter=author, > + version=version, > + total=n) > + series.save() > + > + # we don't save the in-reply-to or references fields > + # for a cover letter, as they can't refer to the same > + # series > + SeriesReference.objects.get_or_create(series=series, > + msgid=msgid) > + > cover_letter = CoverLetter( > msgid=msgid, > project=project, > + series=series, > name=name, > date=date, > headers=headers, > diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py > index 7b5c71b..4783e0f 100644 > --- a/patchwork/tests/test_parser.py > +++ b/patchwork/tests/test_parser.py > @@ -34,11 +34,14 @@ from patchwork.parser import clean_subject > from patchwork.parser import find_author > from patchwork.parser import find_content > from patchwork.parser import find_project_by_header > +from patchwork.parser import find_series > from patchwork.parser import parse_mail as _parse_mail > from patchwork.parser import parse_pull_request > from patchwork.parser import parse_series_marker > +from patchwork.parser import parse_version > from patchwork.parser import split_prefixes > from patchwork.tests.utils import create_project > +from patchwork.tests.utils import create_series_reference > from patchwork.tests.utils import create_state > from patchwork.tests.utils import create_user > from patchwork.tests.utils import read_patch > @@ -323,6 +326,72 @@ class SenderCorrelationTest(TestCase): > self.assertEqual(person_b.id, person_a.id) > > > +class SeriesCorrelationTest(TestCase): > + """Validate correct behavior of find_series.""" > + > + @staticmethod > + def _create_email(msgid, references=None): > + """Create a sample mail. > + > + Arguments: > + msgid (str): The message's msgid > + references (list): A list of preceding messages' msgids, > + oldest first > + """ > + mail = 'Message-Id: %s\n' % msgid + \ > + 'From: example user <[email protected]>\n' + \ > + 'Subject: Tests\n' > + > + if references: > + mail += 'In-Reply-To: %s\n' % references[-1] > + mail += 'References: %s\n' % '\n\t'.join(references) > + > + mail += 'test\n\n' + SAMPLE_DIFF > + return message_from_string(mail) > + > + def test_new_series(self): > + msgid = make_msgid() > + email = self._create_email(msgid) > + > + self.assertIsNone(find_series(email)) > + > + def test_first_reply(self): > + msgid_a = make_msgid() > + msgid_b = make_msgid() > + email = self._create_email(msgid_b, [msgid_a]) > + > + # assume msgid_a was already handled > + ref = create_series_reference(msgid=msgid_a) > + > + series = find_series(email) > + self.assertEqual(series, ref.series) > + > + def test_nested_series(self): > + """Handle a series sent in-reply-to an existing series.""" > + # create an old series with a "cover letter" > + msgids = [make_msgid()] > + ref_v1 = create_series_reference(msgid=msgids[0]) > + > + # ...and three patches > + for i in range(3): > + msgids.append(make_msgid()) > + create_series_reference(msgid=msgids[-1], > + series=ref_v1.series) > + > + # now create a new series with "cover letter" > + msgids.append(make_msgid()) > + ref_v2 = create_series_reference(msgid=msgids[-1]) > + > + # ...and the "first patch" of this new series > + msgid = make_msgid() > + email = self._create_email(msgid, msgids) > + series = find_series(email) > + > + # this should link to the second series - not the first > + self.assertEqual(len(msgids), 4 + 1) # old series + new cover > + self.assertEqual(series, ref_v2.series) > + > + > class SubjectEncodingTest(TestCase): > """Validate correct handling of encoded subjects.""" > > @@ -658,24 +727,6 @@ class ParseInitialTagsTest(PatchTest): > tag__name='Tested-by').count, 1) > > > -class PrefixTest(TestCase): > - > - def test_split_prefixes(self): > - self.assertEqual(split_prefixes('PATCH'), ['PATCH']) > - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) > - self.assertEqual(split_prefixes(''), []) > - self.assertEqual(split_prefixes('PATCH,'), ['PATCH']) > - self.assertEqual(split_prefixes('PATCH '), ['PATCH']) > - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) > - self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2']) > - > - def test_series_markers(self): > - self.assertEqual(parse_series_marker([]), (None, None)) > - self.assertEqual(parse_series_marker(['bar']), (None, None)) > - self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2)) > - self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12)) > - > - > class SubjectTest(TestCase): > > def test_clean_subject(self): > @@ -706,3 +757,27 @@ class SubjectTest(TestCase): > ('[bar] meep', ['bar'])) > self.assertEqual(clean_subject('[FOO] [bar] meep', ['foo']), > ('[bar] meep', ['bar'])) > + def test_split_prefixes(self): > + self.assertEqual(split_prefixes('PATCH'), ['PATCH']) > + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) > + self.assertEqual(split_prefixes(''), []) > + self.assertEqual(split_prefixes('PATCH,'), ['PATCH']) > + self.assertEqual(split_prefixes('PATCH '), ['PATCH']) > + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) > + self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2']) > + > + def test_series_markers(self): > + self.assertEqual(parse_series_marker([]), (None, None)) > + self.assertEqual(parse_series_marker(['bar']), (None, None)) > + self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2)) > + self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12)) > + > + def test_version(self): > + self.assertEqual(parse_version('', []), 1) > + self.assertEqual(parse_version('Hello, world', []), 1) > + self.assertEqual(parse_version('Hello, world', ['version']), 1) > + self.assertEqual(parse_version('Hello, world', ['v2']), 2) > + self.assertEqual(parse_version('Hello, world', ['V6']), 6) > + self.assertEqual(parse_version('Hello, world', ['v10']), 10) > + self.assertEqual(parse_version('Hello, world (v2)', []), 2) > + self.assertEqual(parse_version('Hello, world (V6)', []), 6) > diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py > index 29455d2..cf30dac 100644 > --- a/patchwork/tests/utils.py > +++ b/patchwork/tests/utils.py > @@ -31,6 +31,9 @@ from patchwork.models import CoverLetter > from patchwork.models import Patch > from patchwork.models import Person > from patchwork.models import Project > +from patchwork.models import Series > +from patchwork.models import SeriesReference > +from patchwork.models import SeriesRevision > from patchwork.models import State > > SAMPLE_DIFF = """--- /dev/null 2011-01-01 00:00:00.000000000 +0800 > @@ -217,6 +220,35 @@ def create_check(**kwargs): > return Check.objects.create(**values) > > > +def create_series(**kwargs): > + """Create 'Series' object.""" > + return Series.objects.create(**kwargs) > + > + > +def create_series_revision(**kwargs): > + """Create 'SeriesRevision' object.""" > + values = { > + 'group': create_series() if 'group' not in kwargs else None, > + 'date': dt.now(), > + 'submitter': create_person() if 'submitter' not in kwargs else None, > + 'total': 1, > + } > + values.update(**kwargs) > + > + return SeriesRevision.objects.create(**values) > + > + > +def create_series_reference(**kwargs): > + """Create 'SeriesReference' object.""" > + values = { > + 'series': create_series_revision() if 'series' not in kwargs else > None, > + 'msgid': make_msgid(), > + } > + values.update(**kwargs) > + > + return SeriesReference.objects.create(**values) > + > + > def _create_submissions(create_func, count=1, **kwargs): > """Create 'count' Submission-based objects. > > _______________________________________________ Patchwork mailing list [email protected] https://lists.ozlabs.org/listinfo/patchwork
