As with parsemail, parsearchive makes more sense as a management command. Make it so.
As with the conversion of the 'parsemail' tool, this removes customisable logging as it's not necessary. Signed-off-by: Stephen Finucane <stephenfinuc...@hotmail.com> Closes-bug: #17 --- v6: - Add additional unit tests v4: - Add unit tests - Add support for Django 1.10 --- docs/development.md | 8 +- patchwork/bin/parsearchive.py | 106 -------------------------- patchwork/management/commands/parsearchive.py | 106 ++++++++++++++++++++++++++ patchwork/tests/test_management.py | 32 ++++++++ 4 files changed, 142 insertions(+), 110 deletions(-) delete mode 100755 patchwork/bin/parsearchive.py create mode 100644 patchwork/management/commands/parsearchive.py diff --git a/docs/development.md b/docs/development.md index e51f7b1..36d2fdf 100644 --- a/docs/development.md +++ b/docs/development.md @@ -268,8 +268,8 @@ using the aptly-named `createsuperuser` command: Once this is done, it's beneficial to load some real emails into the system. This can be done manually, however it's generally much easier to download -an archive from a Mailman instance and load these using the `parsearchive.py` -tool. You can do this like so: +an archive from a Mailman instance and load these using the `parsearchive` +command. You can do this like so: (.venv)$ mm_user=myusername (.venv)$ mm_pass=mypassword @@ -288,8 +288,8 @@ find more informations about this [here][ref-mman-bulk]. Load these archives into Patchwork. Depending on the size of the downloaded archives this may take some time: - (.venv)$ PYTHONPATH=. ./patchwork/bin/parsearchive.py \ - --list-id=patchwork.ozlabs.org patchwork.mbox + (.venv)$ ./manage.py parsearchive --list-id=patchwork.ozlabs.org \ + patchwork.mbox Finally, run the server and browse to the IP address of your board using your browser of choice: diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py deleted file mode 100755 index 8986b22..0000000 --- a/patchwork/bin/parsearchive.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python -# -# Patchwork - automated patch tracking system -# Copyright (C) 2015 Intel Corporation -# -# This file is part of the Patchwork package. -# -# Patchwork is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# Patchwork is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Patchwork; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Utility to parse an mbox archive file.""" - -from __future__ import absolute_import - -import argparse -import logging -import mailbox - -import django - -from patchwork.parser import parse_mail -from patchwork import models - -LOGGER = logging.getLogger(__name__) - -VERBOSITY_LEVELS = { - 'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL -} - - -def parse_mbox(path, list_id): - results = { - models.Patch: 0, - models.CoverLetter: 0, - models.Comment: 0, - } - duplicates = 0 - dropped = 0 - - mbox = mailbox.mbox(path) - for msg in mbox: - try: - obj = parse_mail(msg, list_id) - if obj: - results[type(obj)] += 1 - else: - dropped += 1 - except django.db.utils.IntegrityError: - duplicates += 1 - print('Processed %(total)d messages -->\n' - ' %(covers)4d cover letters\n' - ' %(patches)4d patches\n' - ' %(comments)4d comments\n' - ' %(duplicates)4d duplicates\n' - ' %(dropped)4d dropped\n' - 'Total: %(new)s new entries' % { - 'total': len(mbox), - 'covers': results[models.CoverLetter], - 'patches': results[models.Patch], - 'comments': results[models.Comment], - 'duplicates': duplicates, - 'dropped': dropped, - 'new': len(mbox) - duplicates - dropped, - }) - - -def main(): - django.setup() - parser = argparse.ArgumentParser(description=__doc__) - - def list_logging_levels(): - """Give a summary of all available logging levels.""" - return sorted(VERBOSITY_LEVELS.keys(), - key=lambda x: VERBOSITY_LEVELS[x]) - - parser.add_argument('inpath', help='input mbox filename') - - group = parser.add_argument_group('Mail parsing configuration') - group.add_argument('--list-id', help='mailing list ID. If not supplied ' - 'this will be extracted from the mail headers.') - group.add_argument('--verbosity', choices=list_logging_levels(), - help='debug level', default='info') - - args = vars(parser.parse_args()) - - logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']]) - - parse_mbox(args['inpath'], args['list_id']) - -if __name__ == '__main__': - main() diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py new file mode 100644 index 0000000..310c6cf --- /dev/null +++ b/patchwork/management/commands/parsearchive.py @@ -0,0 +1,106 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2016 Stephen Finucane <stephenfinuc...@hotmail.com> +# +# This file is part of the Patchwork package. +# +# Patchwork is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Patchwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchwork; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import logging +import mailbox +from optparse import make_option +import os +import sys + +import django +from django.core.management.base import BaseCommand + +from patchwork import models +from patchwork.parser import parse_mail + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = 'Parse an mbox archive file and store any patches/comments found.' + + if django.VERSION < (1, 8): + args = '<infile>' + option_list = BaseCommand.option_list + ( + make_option( + '--list-id', + help='mailing list ID. If not supplied, this will be ' + 'extracted from the mail headers.'), + ) + else: + def add_arguments(self, parser): + parser.add_argument( + 'infile', + help='input mbox filename') + parser.add_argument( + '--list-id', + help='mailing list ID. If not supplied, this will be ' + 'extracted from the mail headers.') + + def handle(self, *args, **options): + results = { + models.Patch: 0, + models.CoverLetter: 0, + models.Comment: 0, + } + duplicates = 0 + dropped = 0 + + # TODO(stephenfin): Support passing via stdin + infile = args[0] if args else options['infile'] + # the mailbox.mbox function will create a new mbox if the file + # file does not exist. We must manually validate this. + if not os.path.exists(infile): + self.stdout.write('Invalid path: %s' % infile) + sys.exit(1) + + mbox = mailbox.mbox(infile) + count = len(mbox) + + logger.info('Parsing %d mails', count) + for i, msg in enumerate(mbox): + try: + obj = parse_mail(msg, options['list_id']) + if obj: + results[type(obj)] += 1 + else: + dropped += 1 + except django.db.utils.IntegrityError: + duplicates += 1 + + if (i % 10) == 0: + self.stdout.write('%06d/%06d\r' % (i, count), ending='') + self.stdout.flush() + + self.stdout.write( + 'Processed %(total)d messages -->\n' + ' %(covers)4d cover letters\n' + ' %(patches)4d patches\n' + ' %(comments)4d comments\n' + ' %(duplicates)4d duplicates\n' + ' %(dropped)4d dropped\n' + 'Total: %(new)s new entries' % { + 'total': count, + 'covers': results[models.CoverLetter], + 'patches': results[models.Patch], + 'comments': results[models.Comment], + 'duplicates': duplicates, + 'dropped': dropped, + 'new': count - duplicates - dropped, + }) diff --git a/patchwork/tests/test_management.py b/patchwork/tests/test_management.py index 6cd21e2..f2b968f 100644 --- a/patchwork/tests/test_management.py +++ b/patchwork/tests/test_management.py @@ -21,6 +21,7 @@ import os from django.core.management import call_command from django.test import TestCase +from django.utils.six import StringIO from patchwork import models from patchwork.tests import TEST_MAIL_DIR @@ -78,3 +79,34 @@ class ParsemailTest(TestCase): count = models.Patch.objects.filter(project=project.id).count() self.assertEqual(count, 1) + + +class ParsearchiveTest(TestCase): + + def test_invalid_path(self): + out = StringIO() + with self.assertRaises(SystemExit) as exc: + call_command('parsearchive', 'xyz123random', stdout=out) + self.assertEqual(exc.exception.code, 1) + + def test_missing_project_path(self): + out = StringIO() + path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox') + call_command('parsearchive', path, stdout=out) + + self.assertIn('Processed 1 messages -->', out.getvalue()) + self.assertIn('1 dropped', out.getvalue()) + + def test_valid_path(self): + project = utils.create_project() + utils.create_state() + + out = StringIO() + path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox') + call_command('parsearchive', path, list_id=project.listid, stdout=out) + + self.assertIn('Processed 1 messages -->', out.getvalue()) + self.assertIn('1 patches', out.getvalue()) + + count = models.Patch.objects.filter(project=project.id).count() + self.assertEqual(count, 1) -- 2.7.4 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork