Hi folks,

We're going to use Patchwork to track Linaro patches, but instead of
subscribing to all mailing lists to which patches may be sent, we're
asking all Linaro developers to CC a common email address whenever they
send a patch upstream, and we'll then feed all mail delivered to that
address into Patchwork.

That means most of the messages won't have a List-ID header, but we
should still be able to lookup the correct Patchwork project for a given
message based on the email addresses of recipients, hence I'd like to
propose changing find_project() in apps/patchwork/bin/parsemail.py to
fallback to email address lookup when a project can't be found with the
List-ID in the message.

I think such a change wouldn't cause any harm to regular Patchwork
instances and may even be helpful if a mailing list's list-id is
changed.  I'm including a patch which does what I've described above,
just to illustrate, but if you guys think this is a sane change, I'll be
happy to send a properly formatted patch (as soon as I figure out how to
make git combine the patch and the cover letter in a single email ;).

Cheers,
Guilherme


diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index 700cb6f..305ab95 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -55,7 +55,7 @@ def clean_header(header):
 
     return normalise_space(u' '.join(fragments))
 
-def find_project(mail):
+def find_project_by_listid(mail):
     project = None
     listid_res = [re.compile('.*<([^>]+)>.*', re.S),
                   re.compile('^([\S]+)$', re.S)]
@@ -81,6 +81,30 @@ def find_project(mail):
 
     return project
 
+def extract_email_addresses(str):
+    email_re = re.compile(
+        r"([_\.0-9a-zA-Z-+=]+@(([0-9a-zA-Z-]{1,}\.)*)[a-zA-Z]{2,})")
+    # re.findall() will return a list of tuples because we have multiple
+    # groups on the regex above, but we're only interested on the outermost
+    # group (which should contain the whole email address), so we drop the
+    # second and third groups.
+    return [email for email, dummy, dummy2 in email_re.findall(str)]
+
+def find_project_by_list_address(mail):
+    recipients = mail.get('To', '') + mail.get('CC', '')
+    for email_address in extract_email_addresses(recipients):
+        try:
+            return Project.objects.get(listemail = email_address)
+        except Project.DoesNotExist:
+            pass
+    return None
+
+def find_project(mail):
+    project = find_project_by_listid(mail)
+    if project is None:
+        project = find_project_by_list_address(mail)
+    return project
+
 def find_author(mail):
 
     from_header = clean_header(mail.get('From'))
diff --git a/apps/patchwork/tests/patchparser.py 
b/apps/patchwork/tests/patchparser.py
index ff0025a..d4d15d1 100644
--- a/apps/patchwork/tests/patchparser.py
+++ b/apps/patchwork/tests/patchparser.py
@@ -34,8 +34,9 @@ class PatchTest(unittest.TestCase):
     default_subject = defaults.subject
     project = defaults.project
 
-from patchwork.bin.parsemail import find_content, find_author, find_project, \
-                                    parse_mail
+from patchwork.bin.parsemail import (
+    extract_email_addresses, find_content, find_author, find_project,
+    parse_mail)
 
 class InlinePatchTest(PatchTest):
     patch_filename = '0001-add-line.patch'
@@ -276,18 +277,44 @@ class 
MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
             # and the one we parsed in setUp()
             self.assertEquals(Comment.objects.filter(patch = patch).count(), 2)
 
-class ListIdHeaderTest(unittest.TestCase):
-    """ Test that we parse List-Id headers from mails correctly """
+class EmailProjectGuessing(unittest.TestCase):
+    """Projects are guessed based on List-Id headers or recipient addresses"""
     def setUp(self):
         self.project = Project(linkname = 'test-project-1', name = 'Project 1',
                 listid = '1.example.com', listemail='[email protected]')
         self.project.save()
 
+    def testExtractingEmailAddressesFromRecipientsList(self):
+        emails = extract_email_addresses(
+            '"Foo Bar" <[email protected]>,'
+            '<[email protected]>,'
+            '[email protected],'
+            # Notice that this one is not a valid email address.
+            '[email protected]')
+        self.assertEqual(
+            ['[email protected]',
+             '[email protected]',
+             '[email protected]'],
+            emails)
+
     def testNoListId(self):
         email = MIMEText('')
         project = find_project(email)
         self.assertEquals(project, None)
 
+    def testNoListIdWithListEmailAsRecipient(self):
+        email = MIMEText('')
+        email['To'] = '"First dev list" <[email protected]>'
+        project = find_project(email)
+        self.assertEquals(self.project, project)
+
+    def testNoListIdWithListEmailAsCC(self):
+        email = MIMEText('')
+        email['CC'] = ('"First maintainer <[email protected]>, '
+                       '"First dev list" <[email protected]>')
+        project = find_project(email)
+        self.assertEquals(self.project, project)
+
     def testBlankListId(self):
         email = MIMEText('')
         email['List-Id'] = ''


-- 
Guilherme Salgado <https://launchpad.net/~salgado>

Attachment: signature.asc
Description: This is a digitally signed message part

_______________________________________________
Patchwork mailing list
[email protected]
https://lists.ozlabs.org/listinfo/patchwork

Reply via email to