It is now possible to parse and store series, so do just that. The parsing at the moment is based on both RFC822 headers and subject lines.
Signed-off-by: Stephen Finucane <[email protected]> --- patchwork/bin/parsemail.py | 112 ++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 102 insertions(+), 10 deletions(-) diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py index 8648d29..1c7ce0f 100755 --- a/patchwork/bin/parsemail.py +++ b/patchwork/bin/parsemail.py @@ -25,8 +25,10 @@ import argparse import codecs import datetime from email import message_from_file -from email.header import Header, decode_header -from email.utils import parsedate_tz, mktime_tz +from email.header import Header +from email.header import decode_header +from email.utils import parsedate_tz +from email.utils import mktime_tz from fnmatch import fnmatch from functools import reduce import logging @@ -41,10 +43,20 @@ from django.utils.log import AdminEmailHandler from django.utils import six from django.utils.six.moves import map -from patchwork.models import (Patch, Project, Person, Comment, State, - DelegationRule, Submission, CoverLetter, - get_default_initial_patch_state) -from patchwork.parser import parse_patch, patch_get_filenames +from patchwork.models import Comment +from patchwork.models import CoverLetter +from patchwork.models import DelegationRule +from patchwork.models import get_default_initial_patch_state +from patchwork.models import Patch +from patchwork.models import Person +from patchwork.models import Project +from patchwork.models import SeriesRevision +from patchwork.models import SeriesReference +from patchwork.models import State +from patchwork.models import Submission + +from patchwork.parser import parse_patch +from patchwork.parser import patch_get_filenames LOGGER = logging.getLogger(__name__) @@ -114,6 +126,31 @@ def find_project_by_header(mail): return project +def find_series(mail): + """Find a patch's `SeriesRevision`. + + Args: + mail (email.message.Message): The mail to extract series from + + Returns: + The matching `SeriesRevision` instance, if any + """ + series = None + + for ref in find_references(mail) + [mail.get('Message-ID').strip()]: + # try parsing by RFC5322 fields first + try: + series_ref = SeriesReference.objects.get(msgid=ref) + series = series_ref.series + except SeriesReference.DoesNotExist: + pass + + if series: + break + + return series + + def find_author(mail): from_header = clean_header(mail.get('From')) @@ -202,6 +239,13 @@ def find_references(mail): return refs +def _parse_prefixes(subject_prefixes, regex): + for prefix in subject_prefixes: + m = regex.match(prefix) + if m: + return m + + def parse_series_marker(subject_prefixes): """Extract series markers from subject. @@ -217,14 +261,31 @@ def parse_series_marker(subject_prefixes): """ regex = re.compile('^([0-9]+)/([0-9]+)$') - for prefix in subject_prefixes: - m = regex.match(prefix) - if not m: - continue + m = _parse_prefixes(subject_prefixes, regex) + if m: return (int(m.group(1)), int(m.group(2))) + return (None, None) +def parse_version(subject_prefixes): + """Extract patch version. + + Args: + subject_prefixes: List of subject prefixes to extract version + from + + Returns: + version if found, else 1 + """ + regex = re.compile('^v([0-9]+)$') + m = _parse_prefixes(subject_prefixes, regex) + if m: + return int(m.group(1)) + + return 1 + + def find_content(project, mail): patchbuf = None commentbuf = '' @@ -481,9 +542,11 @@ def parse_mail(mail, list_id=None): author = find_author(mail) name, prefixes = clean_subject(mail.get('Subject'), [project.linkname]) x, n = parse_series_marker(prefixes) + version = parse_version(prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) + # TODO(stephenfin) This should have a 'parse_' prefix pull_url = find_pull_request(message) # build objects @@ -497,9 +560,24 @@ def parse_mail(mail, list_id=None): filenames = patch_get_filenames(diff) delegate = auto_delegate(project, filenames) + # TODO(stephenfin) Eventually this should be moved to a function + series = find_series(mail) + if not series and n: # the series markers indicates a series + series = SeriesRevision(date=date, + submitter=author, + version=version, + total=n) + series.save() + + for ref in refs + [msgid]: # save references for series + series_ref = SeriesReference(series=series, + msgid=ref) + series_ref.save() + patch = Patch( msgid=msgid, project=project, + series=series, name=name, date=date, headers=headers, @@ -529,9 +607,23 @@ def parse_mail(mail, list_id=None): if is_cover_letter: author.save() + series = find_series(mail) + if not series: + series = SeriesRevision(date=date, + submitter=author, + version=version, + total=n) + series.save() + + for ref in refs + [msgid]: # save references for series + series_ref = SeriesReference(series=series, + msgid=ref) + series_ref.save() + cover_letter = CoverLetter( msgid=msgid, project=project, + series=series, name=name, date=date, headers=headers, -- 1.7.4.1 _______________________________________________ Patchwork mailing list [email protected] https://lists.ozlabs.org/listinfo/patchwork
