Re: [PATCH 08/10] parse(mail|archive): handle early fail within email module
On Thu, 2017-06-29 at 00:06 +1000, Daniel Axtens wrote: > Andrew Donnellanwrites: > > > On 28/06/17 17:48, Daniel Axtens wrote: > > > Certain really messed up email messages can cause a failure within > > > the email module (at least on py3). Catch this. > > > > > > Signed-off-by: Daniel Axtens > > > --- > > > patchwork/management/commands/parsearchive.py | 9 > > > patchwork/management/commands/parsemail.py| 31 - > > > -- > > > 2 files changed, 27 insertions(+), 13 deletions(-) > > > > > > diff --git a/patchwork/management/commands/parsearchive.py > > > b/patchwork/management/commands/parsearchive.py > > > index a3c8360186c8..3aab58a45bcd 100644 > > > --- a/patchwork/management/commands/parsearchive.py > > > +++ b/patchwork/management/commands/parsearchive.py > > > @@ -77,6 +77,15 @@ class Command(BaseCommand): > > > > > > count = len(mbox) > > > > > > +# detect broken mails in the mbox > > > +# see earlyfail fuzz test on py3 > > > +try: > > > +for m in mbox: > > > +pass > > > +except AttributeError: > > > +logger.warning('Broken mbox/Maildir, aborting') > > > +return > > > + > > > > This seems a bit non-obvious and could do with a little bit of explanation? > > The message, or the code structure? I structured the code this way > rather than the more obvious > try: > mbox = [m for m in mbox] > ... > because the more obvious way requires loading the entire mbox/maildir > into memory and I was a bit worried about the memory consumption of that > when parsing a large mbox. > > I agree a more helpful comment would have been in order. Stephen, do you > want a v2 of this patch by itself? I can resend the series but it seems > a bit excessive... Or I could do a follow-up. Yeah, let's just do a follow-up. I'm going to go ahead and merge the rest of these. Stephen ___ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork
Re: [PATCH 08/10] parse(mail|archive): handle early fail within email module
Andrew Donnellanwrites: > On 28/06/17 17:48, Daniel Axtens wrote: >> Certain really messed up email messages can cause a failure within >> the email module (at least on py3). Catch this. >> >> Signed-off-by: Daniel Axtens >> --- >> patchwork/management/commands/parsearchive.py | 9 >> patchwork/management/commands/parsemail.py| 31 >> --- >> 2 files changed, 27 insertions(+), 13 deletions(-) >> >> diff --git a/patchwork/management/commands/parsearchive.py >> b/patchwork/management/commands/parsearchive.py >> index a3c8360186c8..3aab58a45bcd 100644 >> --- a/patchwork/management/commands/parsearchive.py >> +++ b/patchwork/management/commands/parsearchive.py >> @@ -77,6 +77,15 @@ class Command(BaseCommand): >> >> count = len(mbox) >> >> +# detect broken mails in the mbox >> +# see earlyfail fuzz test on py3 >> +try: >> +for m in mbox: >> +pass >> +except AttributeError: >> +logger.warning('Broken mbox/Maildir, aborting') >> +return >> + > > This seems a bit non-obvious and could do with a little bit of explanation? The message, or the code structure? I structured the code this way rather than the more obvious try: mbox = [m for m in mbox] ... because the more obvious way requires loading the entire mbox/maildir into memory and I was a bit worried about the memory consumption of that when parsing a large mbox. I agree a more helpful comment would have been in order. Stephen, do you want a v2 of this patch by itself? I can resend the series but it seems a bit excessive... Or I could do a follow-up. Regards, Daniel > >> logger.info('Parsing %d mails', count) >> for i, msg in enumerate(mbox): >> try: >> diff --git a/patchwork/management/commands/parsemail.py >> b/patchwork/management/commands/parsemail.py >> index 9adfb25b09e3..52ec8bc56899 100644 >> --- a/patchwork/management/commands/parsemail.py >> +++ b/patchwork/management/commands/parsemail.py >> @@ -58,20 +58,25 @@ class Command(base.BaseCommand): >> def handle(self, *args, **options): >> infile = args[0] if args else options['infile'] >> >> -if infile: >> -logger.info('Parsing mail loaded by filename') >> -if six.PY3: >> -with open(infile, 'rb') as file_: >> -mail = email.message_from_binary_file(file_) >> -else: >> -with open(infile) as file_: >> -mail = email.message_from_file(file_) >> -else: >> -logger.info('Parsing mail loaded from stdin') >> -if six.PY3: >> -mail = email.message_from_binary_file(sys.stdin.buffer) >> +try: >> +if infile: >> +logger.info('Parsing mail loaded by filename') >> +if six.PY3: >> +with open(infile, 'rb') as file_: >> +mail = email.message_from_binary_file(file_) >> +else: >> +with open(infile) as file_: >> +mail = email.message_from_file(file_) >> else: >> -mail = email.message_from_file(sys.stdin) >> +logger.info('Parsing mail loaded from stdin') >> +if six.PY3: >> +mail = email.message_from_binary_file(sys.stdin.buffer) >> +else: >> +mail = email.message_from_file(sys.stdin) >> +except AttributeError: >> +logger.warning("Broken email ignored") >> +return >> + >> try: >> result = parse_mail(mail, options['list_id']) >> if result: >> > > -- > Andrew Donnellan OzLabs, ADL Canberra > andrew.donnel...@au1.ibm.com IBM Australia Limited ___ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork
Re: [PATCH 08/10] parse(mail|archive): handle early fail within email module
On 28/06/17 17:48, Daniel Axtens wrote: Certain really messed up email messages can cause a failure within the email module (at least on py3). Catch this. Signed-off-by: Daniel Axtens--- patchwork/management/commands/parsearchive.py | 9 patchwork/management/commands/parsemail.py| 31 --- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index a3c8360186c8..3aab58a45bcd 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -77,6 +77,15 @@ class Command(BaseCommand): count = len(mbox) +# detect broken mails in the mbox +# see earlyfail fuzz test on py3 +try: +for m in mbox: +pass +except AttributeError: +logger.warning('Broken mbox/Maildir, aborting') +return + This seems a bit non-obvious and could do with a little bit of explanation? logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py index 9adfb25b09e3..52ec8bc56899 100644 --- a/patchwork/management/commands/parsemail.py +++ b/patchwork/management/commands/parsemail.py @@ -58,20 +58,25 @@ class Command(base.BaseCommand): def handle(self, *args, **options): infile = args[0] if args else options['infile'] -if infile: -logger.info('Parsing mail loaded by filename') -if six.PY3: -with open(infile, 'rb') as file_: -mail = email.message_from_binary_file(file_) -else: -with open(infile) as file_: -mail = email.message_from_file(file_) -else: -logger.info('Parsing mail loaded from stdin') -if six.PY3: -mail = email.message_from_binary_file(sys.stdin.buffer) +try: +if infile: +logger.info('Parsing mail loaded by filename') +if six.PY3: +with open(infile, 'rb') as file_: +mail = email.message_from_binary_file(file_) +else: +with open(infile) as file_: +mail = email.message_from_file(file_) else: -mail = email.message_from_file(sys.stdin) +logger.info('Parsing mail loaded from stdin') +if six.PY3: +mail = email.message_from_binary_file(sys.stdin.buffer) +else: +mail = email.message_from_file(sys.stdin) +except AttributeError: +logger.warning("Broken email ignored") +return + try: result = parse_mail(mail, options['list_id']) if result: -- Andrew Donnellan OzLabs, ADL Canberra andrew.donnel...@au1.ibm.com IBM Australia Limited ___ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork
[PATCH 08/10] parse(mail|archive): handle early fail within email module
Certain really messed up email messages can cause a failure within the email module (at least on py3). Catch this. Signed-off-by: Daniel Axtens--- patchwork/management/commands/parsearchive.py | 9 patchwork/management/commands/parsemail.py| 31 --- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py index a3c8360186c8..3aab58a45bcd 100644 --- a/patchwork/management/commands/parsearchive.py +++ b/patchwork/management/commands/parsearchive.py @@ -77,6 +77,15 @@ class Command(BaseCommand): count = len(mbox) +# detect broken mails in the mbox +# see earlyfail fuzz test on py3 +try: +for m in mbox: +pass +except AttributeError: +logger.warning('Broken mbox/Maildir, aborting') +return + logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py index 9adfb25b09e3..52ec8bc56899 100644 --- a/patchwork/management/commands/parsemail.py +++ b/patchwork/management/commands/parsemail.py @@ -58,20 +58,25 @@ class Command(base.BaseCommand): def handle(self, *args, **options): infile = args[0] if args else options['infile'] -if infile: -logger.info('Parsing mail loaded by filename') -if six.PY3: -with open(infile, 'rb') as file_: -mail = email.message_from_binary_file(file_) -else: -with open(infile) as file_: -mail = email.message_from_file(file_) -else: -logger.info('Parsing mail loaded from stdin') -if six.PY3: -mail = email.message_from_binary_file(sys.stdin.buffer) +try: +if infile: +logger.info('Parsing mail loaded by filename') +if six.PY3: +with open(infile, 'rb') as file_: +mail = email.message_from_binary_file(file_) +else: +with open(infile) as file_: +mail = email.message_from_file(file_) else: -mail = email.message_from_file(sys.stdin) +logger.info('Parsing mail loaded from stdin') +if six.PY3: +mail = email.message_from_binary_file(sys.stdin.buffer) +else: +mail = email.message_from_file(sys.stdin) +except AttributeError: +logger.warning("Broken email ignored") +return + try: result = parse_mail(mail, options['list_id']) if result: -- 2.11.0 ___ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork