Hi folks, We're going to use Patchwork to track Linaro patches, but instead of subscribing to all mailing lists to which patches may be sent, we're asking all Linaro developers to CC a common email address whenever they send a patch upstream, and we'll then feed all mail delivered to that address into Patchwork.
That means most of the messages won't have a List-ID header, but we
should still be able to lookup the correct Patchwork project for a given
message based on the email addresses of recipients, hence I'd like to
propose changing find_project() in apps/patchwork/bin/parsemail.py to
fallback to email address lookup when a project can't be found with the
List-ID in the message.
I think such a change wouldn't cause any harm to regular Patchwork
instances and may even be helpful if a mailing list's list-id is
changed. I'm including a patch which does what I've described above,
just to illustrate, but if you guys think this is a sane change, I'll be
happy to send a properly formatted patch (as soon as I figure out how to
make git combine the patch and the cover letter in a single email ;).
Cheers,
Guilherme
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index 700cb6f..305ab95 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -55,7 +55,7 @@ def clean_header(header):
return normalise_space(u' '.join(fragments))
-def find_project(mail):
+def find_project_by_listid(mail):
project = None
listid_res = [re.compile('.*<([^>]+)>.*', re.S),
re.compile('^([\S]+)$', re.S)]
@@ -81,6 +81,30 @@ def find_project(mail):
return project
+def extract_email_addresses(str):
+ email_re = re.compile(
+ r"([_\.0-9a-zA-Z-+=]+@(([0-9a-zA-Z-]{1,}\.)*)[a-zA-Z]{2,})")
+ # re.findall() will return a list of tuples because we have multiple
+ # groups on the regex above, but we're only interested on the outermost
+ # group (which should contain the whole email address), so we drop the
+ # second and third groups.
+ return [email for email, dummy, dummy2 in email_re.findall(str)]
+
+def find_project_by_list_address(mail):
+ recipients = mail.get('To', '') + mail.get('CC', '')
+ for email_address in extract_email_addresses(recipients):
+ try:
+ return Project.objects.get(listemail = email_address)
+ except Project.DoesNotExist:
+ pass
+ return None
+
+def find_project(mail):
+ project = find_project_by_listid(mail)
+ if project is None:
+ project = find_project_by_list_address(mail)
+ return project
+
def find_author(mail):
from_header = clean_header(mail.get('From'))
diff --git a/apps/patchwork/tests/patchparser.py
b/apps/patchwork/tests/patchparser.py
index ff0025a..d4d15d1 100644
--- a/apps/patchwork/tests/patchparser.py
+++ b/apps/patchwork/tests/patchparser.py
@@ -34,8 +34,9 @@ class PatchTest(unittest.TestCase):
default_subject = defaults.subject
project = defaults.project
-from patchwork.bin.parsemail import find_content, find_author, find_project, \
- parse_mail
+from patchwork.bin.parsemail import (
+ extract_email_addresses, find_content, find_author, find_project,
+ parse_mail)
class InlinePatchTest(PatchTest):
patch_filename = '0001-add-line.patch'
@@ -276,18 +277,44 @@ class
MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
# and the one we parsed in setUp()
self.assertEquals(Comment.objects.filter(patch = patch).count(), 2)
-class ListIdHeaderTest(unittest.TestCase):
- """ Test that we parse List-Id headers from mails correctly """
+class EmailProjectGuessing(unittest.TestCase):
+ """Projects are guessed based on List-Id headers or recipient addresses"""
def setUp(self):
self.project = Project(linkname = 'test-project-1', name = 'Project 1',
listid = '1.example.com', listemail='[email protected]')
self.project.save()
+ def testExtractingEmailAddressesFromRecipientsList(self):
+ emails = extract_email_addresses(
+ '"Foo Bar" <[email protected]>,'
+ '<[email protected]>,'
+ '[email protected],'
+ # Notice that this one is not a valid email address.
+ '[email protected]')
+ self.assertEqual(
+ ['[email protected]',
+ '[email protected]',
+ '[email protected]'],
+ emails)
+
def testNoListId(self):
email = MIMEText('')
project = find_project(email)
self.assertEquals(project, None)
+ def testNoListIdWithListEmailAsRecipient(self):
+ email = MIMEText('')
+ email['To'] = '"First dev list" <[email protected]>'
+ project = find_project(email)
+ self.assertEquals(self.project, project)
+
+ def testNoListIdWithListEmailAsCC(self):
+ email = MIMEText('')
+ email['CC'] = ('"First maintainer <[email protected]>, '
+ '"First dev list" <[email protected]>')
+ project = find_project(email)
+ self.assertEquals(self.project, project)
+
def testBlankListId(self):
email = MIMEText('')
email['List-Id'] = ''
--
Guilherme Salgado <https://launchpad.net/~salgado>
signature.asc
Description: This is a digitally signed message part
_______________________________________________ Patchwork mailing list [email protected] https://lists.ozlabs.org/listinfo/patchwork
