Re: [PATCH v3 3/7] parser: Add series parsing

2016-09-22 Thread Andy Doan
On 09/12/2016 04:53 PM, Stephen Finucane wrote:
> It is now possible to parse and store series, so do just that.
> The parsing at the moment is based on both RFC822 headers and
> subject lines.
> 
> Signed-off-by: Stephen Finucane 

thanks for the tests!

Reviewed-by: Andy Doan 

> ---
> v3:
> - Rework how nested series are handled once again
> - Don't search for references when creating a cover letter
> v2:
> - Rebase onto master, moving changes into 'parser'
> - Add unit tests
> - Merge "Handle 'xxx (v2)' style messages" patch
> - Merge "Handle series sent 'in-reply-to'" patch
> - Handle capitalized version prefixes like [V2]
> - Trivial cleanup of some parser functions
> ---
>  patchwork/models.py|   7 ++-
>  patchwork/parser.py| 127 
> +
>  patchwork/tests/test_parser.py | 111 +--
>  patchwork/tests/utils.py   |  32 +++
>  4 files changed, 247 insertions(+), 30 deletions(-)
> 
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 2875369..52325d2 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -230,8 +230,11 @@ class SeriesRevision(models.Model):
>  try:
>  return self.cover_letter.name
>  except CoverLetter.DoesNotExist:
> -return '[Series #%d, revision #%d]' % (self.group.id,
> -   self.version)
> +if self.group:
> +return '[Series #%d, revision #%d]' % (self.group.id,
> +   self.version)
> +else:
> +return '[Untitled series]'
>  
>  @property
>  def actual_total(self):
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 1805df8..d20ac19 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -21,8 +21,10 @@
>  
>  import codecs
>  import datetime
> -from email.header import Header, decode_header
> -from email.utils import parsedate_tz, mktime_tz
> +from email.header import Header
> +from email.header import decode_header
> +from email.utils import parsedate_tz
> +from email.utils import mktime_tz
>  from fnmatch import fnmatch
>  from functools import reduce
>  import logging
> @@ -33,9 +35,17 @@ from django.contrib.auth.models import User
>  from django.utils import six
>  from django.utils.six.moves import map
>  
> -from patchwork.models import (Patch, Project, Person, Comment, State,
> -  DelegationRule, Submission, CoverLetter,
> -  get_default_initial_patch_state)
> +from patchwork.models import Comment
> +from patchwork.models import CoverLetter
> +from patchwork.models import DelegationRule
> +from patchwork.models import get_default_initial_patch_state
> +from patchwork.models import Patch
> +from patchwork.models import Person
> +from patchwork.models import Project
> +from patchwork.models import SeriesRevision
> +from patchwork.models import SeriesReference
> +from patchwork.models import State
> +from patchwork.models import Submission
>  
>  
>  _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
> @@ -100,6 +110,34 @@ def find_project_by_header(mail):
>  return project
>  
>  
> +def find_series(mail):
> +"""Find a patch's `SeriesRevision`.
> +
> +Traverse RFC822 headers, starting with most recent first, to find
> +ancestors and the related series. Headers are traversed in reverse
> +to handle series sent in reply to previous series, like so:
> +
> +[PATCH 0/3] A cover letter
> +  [PATCH 1/3] The first patch
> +  ...
> +  [PATCH v2 0/3] A cover letter
> +[PATCH v2 1/3] The first patch
> +...
> +
> +Args:
> +mail (email.message.Message): The mail to extract series from
> +
> +Returns:
> +The matching `SeriesRevision` instance, if any
> +"""
> +for ref in find_references(mail, True):
> +# try parsing by RFC5322 fields first
> +try:
> +return SeriesReference.objects.get(msgid=ref).series
> +except SeriesReference.DoesNotExist:
> +pass
> +
> +
>  def find_author(mail):
>  from_header = clean_header(mail.get('From'))
>  name, email = (None, None)
> @@ -161,10 +199,13 @@ def find_headers(mail):
> for (k, v) in list(mail.items())])
>  
>  
> -def find_references(mail):
> +def find_references(mail, include_msgid=False):
>  """Construct a list of possible reply message ids."""
>  refs = []
>  
> +if include_msgid:
> +refs.append(mail.get('Message-ID'))
> +
>  if 'In-Reply-To' in mail:
>  refs.append(mail.get('In-Reply-To'))
>  
> @@ -178,6 +219,13 @@ def find_references(mail):
>  return refs
>  
>  
> +def _parse_prefixes(subject_prefixes, regex):
> +for prefix in subject_prefixes:
> + 

[PATCH v3 3/7] parser: Add series parsing

2016-09-12 Thread Stephen Finucane
It is now possible to parse and store series, so do just that.
The parsing at the moment is based on both RFC822 headers and
subject lines.

Signed-off-by: Stephen Finucane 
---
v3:
- Rework how nested series are handled once again
- Don't search for references when creating a cover letter
v2:
- Rebase onto master, moving changes into 'parser'
- Add unit tests
- Merge "Handle 'xxx (v2)' style messages" patch
- Merge "Handle series sent 'in-reply-to'" patch
- Handle capitalized version prefixes like [V2]
- Trivial cleanup of some parser functions
---
 patchwork/models.py|   7 ++-
 patchwork/parser.py| 127 +
 patchwork/tests/test_parser.py | 111 +--
 patchwork/tests/utils.py   |  32 +++
 4 files changed, 247 insertions(+), 30 deletions(-)

diff --git a/patchwork/models.py b/patchwork/models.py
index 2875369..52325d2 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -230,8 +230,11 @@ class SeriesRevision(models.Model):
 try:
 return self.cover_letter.name
 except CoverLetter.DoesNotExist:
-return '[Series #%d, revision #%d]' % (self.group.id,
-   self.version)
+if self.group:
+return '[Series #%d, revision #%d]' % (self.group.id,
+   self.version)
+else:
+return '[Untitled series]'
 
 @property
 def actual_total(self):
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 1805df8..d20ac19 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -21,8 +21,10 @@
 
 import codecs
 import datetime
-from email.header import Header, decode_header
-from email.utils import parsedate_tz, mktime_tz
+from email.header import Header
+from email.header import decode_header
+from email.utils import parsedate_tz
+from email.utils import mktime_tz
 from fnmatch import fnmatch
 from functools import reduce
 import logging
@@ -33,9 +35,17 @@ from django.contrib.auth.models import User
 from django.utils import six
 from django.utils.six.moves import map
 
-from patchwork.models import (Patch, Project, Person, Comment, State,
-  DelegationRule, Submission, CoverLetter,
-  get_default_initial_patch_state)
+from patchwork.models import Comment
+from patchwork.models import CoverLetter
+from patchwork.models import DelegationRule
+from patchwork.models import get_default_initial_patch_state
+from patchwork.models import Patch
+from patchwork.models import Person
+from patchwork.models import Project
+from patchwork.models import SeriesRevision
+from patchwork.models import SeriesReference
+from patchwork.models import State
+from patchwork.models import Submission
 
 
 _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
@@ -100,6 +110,34 @@ def find_project_by_header(mail):
 return project
 
 
+def find_series(mail):
+"""Find a patch's `SeriesRevision`.
+
+Traverse RFC822 headers, starting with most recent first, to find
+ancestors and the related series. Headers are traversed in reverse
+to handle series sent in reply to previous series, like so:
+
+[PATCH 0/3] A cover letter
+  [PATCH 1/3] The first patch
+  ...
+  [PATCH v2 0/3] A cover letter
+[PATCH v2 1/3] The first patch
+...
+
+Args:
+mail (email.message.Message): The mail to extract series from
+
+Returns:
+The matching `SeriesRevision` instance, if any
+"""
+for ref in find_references(mail, True):
+# try parsing by RFC5322 fields first
+try:
+return SeriesReference.objects.get(msgid=ref).series
+except SeriesReference.DoesNotExist:
+pass
+
+
 def find_author(mail):
 from_header = clean_header(mail.get('From'))
 name, email = (None, None)
@@ -161,10 +199,13 @@ def find_headers(mail):
for (k, v) in list(mail.items())])
 
 
-def find_references(mail):
+def find_references(mail, include_msgid=False):
 """Construct a list of possible reply message ids."""
 refs = []
 
+if include_msgid:
+refs.append(mail.get('Message-ID'))
+
 if 'In-Reply-To' in mail:
 refs.append(mail.get('In-Reply-To'))
 
@@ -178,6 +219,13 @@ def find_references(mail):
 return refs
 
 
+def _parse_prefixes(subject_prefixes, regex):
+for prefix in subject_prefixes:
+m = regex.match(prefix)
+if m:
+return m
+
+
 def parse_series_marker(subject_prefixes):
 """Extract series markers from subject.
 
@@ -193,14 +241,36 @@ def parse_series_marker(subject_prefixes):
 """
 
 regex = re.compile('^([0-9]+)/([0-9]+)$')
-for prefix in subject_prefixes:
-m = regex.match(prefix)
-if not m:
-continue