- install python-afl in Docker (py2 doesn't seem to work) - change parser to return BrokenEmailException. This allows us to catch other sorts of ValueError.
- fuzz management command to be used in py-afl-fuzz Signed-off-by: Daniel Axtens <d...@axtens.net> --- patchwork/management/commands/fuzz.py | 88 +++++++++++++++++++++++++++++++++++ patchwork/parser.py | 18 ++++--- patchwork/tests/test_parser.py | 5 +- tools/docker/Dockerfile | 2 + tools/fuzzer_dict | 52 +++++++++++++++++++++ 5 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 patchwork/management/commands/fuzz.py create mode 100644 tools/fuzzer_dict diff --git a/patchwork/management/commands/fuzz.py b/patchwork/management/commands/fuzz.py new file mode 100644 index 000000000000..c2c08bcfbec2 --- /dev/null +++ b/patchwork/management/commands/fuzz.py @@ -0,0 +1,88 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2016 Stephen Finucane <step...@that.guru> +# +# This file is part of the Patchwork package. +# +# Patchwork is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Patchwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchwork; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import email +import logging + +from django.core.management import base +from django.utils import six + +from patchwork.models import Person +from patchwork.models import Patch +from patchwork.models import Series +from patchwork.models import CoverLetter +from patchwork.models import Comment +from patchwork.models import SeriesReference +from patchwork.parser import parse_mail +from patchwork.parser import BrokenEmailException + +import afl +afl.init() + +logger = logging.getLogger(__name__) + + +class Command(base.BaseCommand): + help = 'Parse an mbox file and store any patch/comment found.' + + def add_arguments(self, parser): + parser.add_argument( + 'infile', + nargs=1, + type=str, + help='input mbox file') + parser.add_argument( + '--list-id', + help='mailing list ID. If not supplied, this will be ' + 'extracted from the mail headers.') + + def cleanup(self): + Series.objects.all().delete() + SeriesReference.objects.all().delete() + Patch.objects.all().delete() + Comment.objects.all().delete() + CoverLetter.objects.all().delete() + Person.objects.all().delete() + + def handle(self, *args, **options): + infile = options['infile'][0] + + logger.info('Parsing mail loaded by filename') + try: + if six.PY3: + with open(infile, 'rb') as file_: + mail = email.message_from_binary_file(file_) + else: + with open(infile) as file_: + mail = email.message_from_file(file_) + except AttributeError: + logger.warning("Broken email ignored") + return + + try: + parse_mail(mail, options['list_id']) + self.cleanup() + except BrokenEmailException: + logger.warning("Broken email ignored") + self.cleanup() + except Exception as E: + logger.exception('Error when parsing incoming email', + extra={'mail': mail.as_string()}) + self.cleanup() + raise E diff --git a/patchwork/parser.py b/patchwork/parser.py index 46e6ca161574..0000eaeafa6f 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -54,6 +54,10 @@ SERIES_DELAY_INTERVAL = 10 logger = logging.getLogger(__name__) +class BrokenEmailException(Exception): + pass + + def normalise_space(value): whitespace_re = re.compile(r'\s+') return whitespace_re.sub(' ', value).strip() @@ -293,7 +297,7 @@ def find_author(mail): from_header = clean_header(mail.get('From')) if not from_header: - raise ValueError("Invalid 'From' header") + raise BrokenEmailException("Invalid 'From' header") name, email = (None, None) @@ -324,7 +328,7 @@ def find_author(mail): break if not email: - raise ValueError("Invalid 'From' header") + raise BrokenEmailException("Invalid 'From' header") email = email.strip() if name is not None: @@ -627,7 +631,7 @@ def clean_subject(subject, drop_prefixes=None): subject = clean_header(subject) if not subject: - raise ValueError("Invalid 'Subject' header") + raise BrokenEmailException("Invalid 'Subject' header") if drop_prefixes is None: drop_prefixes = [] @@ -908,13 +912,13 @@ def parse_mail(mail, list_id=None): """ # some basic sanity checks if 'From' not in mail: - raise ValueError("Missing 'From' header") + raise BrokenEmailException("Missing 'From' header") if 'Subject' not in mail: - raise ValueError("Missing 'Subject' header") + raise BrokenEmailException("Missing 'Subject' header") if 'Message-Id' not in mail: - raise ValueError("Missing 'Message-Id' header") + raise BrokenEmailException("Missing 'Message-Id' header") hint = clean_header(mail.get('X-Patchwork-Hint', '')) if hint and hint.lower() == 'ignore': @@ -934,7 +938,7 @@ def parse_mail(mail, list_id=None): msgid = clean_header(mail.get('Message-Id')) if not msgid: - raise ValueError("Broken 'Message-Id' header") + raise BrokenEmailException("Broken 'Message-Id' header") msgid = msgid.strip()[:255] author = find_author(mail) diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index 80a559ec871b..19a9f856c0a8 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -43,6 +43,7 @@ from patchwork.parser import parse_series_marker from patchwork.parser import parse_version from patchwork.parser import split_prefixes from patchwork.parser import subject_check +from patchwork.parser import BrokenEmailException from patchwork.tests import TEST_MAIL_DIR from patchwork.tests import TEST_FUZZ_DIR from patchwork.tests.utils import create_project @@ -236,7 +237,7 @@ class SenderEncodingTest(TestCase): def test_empty(self): email = self._create_email('') - with self.assertRaises(ValueError): + with self.assertRaises(BrokenEmailException): find_author(email) def test_ascii_encoding(self): @@ -838,7 +839,7 @@ class FuzzTest(TestCase): m = load_mail(file_path) try: parse_mail(m, list_id="patchwork.ozlabs.org") - except ValueError: + except BrokenEmailException: pass @skipIf(six.PY2, "breaks only on python3") diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index ff05707a6049..266603e3bdcf 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -48,6 +48,8 @@ RUN cat /tmp/bashrc >> /home/patchwork/.bashrc COPY tools/docker/entrypoint.sh /usr/local/bin/entrypoint.sh +RUN apt-get install -y cython cython3; pip3 install python-afl; pip install python-afl + ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] USER patchwork WORKDIR /home/patchwork/patchwork diff --git a/tools/fuzzer_dict b/tools/fuzzer_dict new file mode 100644 index 000000000000..3aef26dc6154 --- /dev/null +++ b/tools/fuzzer_dict @@ -0,0 +1,52 @@ +mid1="Message-ID:" +mid2="Message-Id:" +from1="From:" +from2="From " +to="To:" +cc="CC:" +cc2="Cc:" +subject="Subject:" +date="Date:" +ct="Content-Type:" +mime="This is a multi-part message in MIME format." +tp="text/plain;" +csiso="charset=ISO-8859-1; " +csutf="charset=\"utf-8\"; " +utf8="Rafa\x25\x82 Mi\xc5\x82ecki <zaj...@gmail.com>" +sob="Signed-off-by: " +gitb="--- " +cvsb="===" +rb="Reviewed-by: " +ab="Acked-by: " +utf8enc="=?utf-8?b?UmFmYcWCIE1pxYJlY2tp?=" +utf8brokenenc="=?UTF-8?q?Rafa=FF=FF=20Mi=FF=FFecki?=" +diff="diff " +index1="index " +index2="Index: " +list1="List-ID" +list2="X-Mailing-List" +list3="X-list" +irt="In-Reply-To:" +ref="References:" +of=" of " +xpatch="x-patch" +xdiff="x-diff" +text="text" +plain="plain" +patch="PATCH" +re="Re:" +fwd="Fwd:" +sig="-- " +foot="_____" +atat="@@" +rename1="rename from " +rename2="rename to " +plus="+++ " +nnl="\\ No newline at end of file" +pr1="The following changes since commit" +pr2="are available in the git repository at:\x0a" +xps="X-Patchwork-State:" +xpd="X-Patchwork-Delegate:" +xph="X-Patchwork-Hint:" +ignore="ignore" +devnull="/dev/null" \ No newline at end of file -- 2.11.0 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork