------------------------------------------------------------ revno: 6524 committer: Barry Warsaw <[EMAIL PROTECTED]> branch nick: 3.0 timestamp: Thu 2007-07-05 23:01:23 -0400 message: Convert MimeDel tests to doctest. No other changes to the handler module are made. added: Mailman/docs/filtering.txt modified: Mailman/testing/test_handlers.py
=== added file 'Mailman/docs/filtering.txt' --- a/Mailman/docs/filtering.txt 1970-01-01 00:00:00 +0000 +++ b/Mailman/docs/filtering.txt 2007-07-06 03:01:23 +0000 @@ -0,0 +1,353 @@ +Content filtering +================= + +Mailman can filter the content of messages posted to a mailing list by +stripping MIME subparts, and possibly reorganizing the MIME structure of a +message. It does this with the MimeDel handler module, although other +handlers can potentially do other kinds of finer level content filtering. + + >>> from Mailman.Handlers.MimeDel import process + >>> from Mailman.Message import Message + >>> from Mailman.configuration import config + >>> from Mailman.database import flush + >>> from email import message_from_string + >>> mlist = config.list_manager.create('[EMAIL PROTECTED]') + >>> mlist.preferred_language = 'en' + >>> flush() + +Several mailing list options control content filtering. First, the feature +must be enabled, then there are two options that control which MIME types get +filtered and which get passed. Finally, there is an option to control whether +text/html parts will get converted to plain text. Let's set up some defaults +for these variables, then we'll explain them in more detail below. + + >>> mlist.filter_content = True + >>> mlist.filter_mime_types = [] + >>> mlist.pass_mime_types = [] + >>> mlist.convert_html_to_plaintext = False + >>> flush() + + +Filtering the outer content type +-------------------------------- + +A simple filtering setting will just search the content types of the messages +parts, discarding all parts with a matching MIME type. If the message's outer +content type matches the filter, the entire message will be discarded. + + >>> mlist.filter_mime_types = ['image/jpeg'] + >>> # XXX Change this to an enum + >>> mlist.filter_action = 0 # Discard + >>> flush() + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxxxx + ... """, Message) + >>> process(mlist, msg, {}) + Traceback (most recent call last): + ... + DiscardMessage + +However, if we turn off content filtering altogether, then the handler +short-circuits. + + >>> mlist.filter_content = False + >>> flush() + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Content-Type: image/jpeg + MIME-Version: 1.0 + <BLANKLINE> + xxxxx + >>> msgdata + {} + +Similarly, no content filtering is performed on digest messages, which are +crafted internally by Mailman. + + >>> mlist.filter_content = True + >>> flush() + >>> msgdata = {'isdigest': True} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Content-Type: image/jpeg + MIME-Version: 1.0 + <BLANKLINE> + xxxxx + >>> msgdata + {'isdigest': True} + + +Simple multipart filtering +-------------------------- + +If one of the subparts in a multipart message matches the filter type, then +just that subpart will be stripped. + + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Content-Type: multipart/mixed; boundary=BOUNDARY + ... MIME-Version: 1.0 + ... + ... --BOUNDARY + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxx + ... + ... --BOUNDARY + ... Content-Type: image/gif + ... MIME-Version: 1.0 + ... + ... yyy + ... --BOUNDARY-- + ... """, Message) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Content-Type: multipart/mixed; boundary=BOUNDARY + MIME-Version: 1.0 + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --BOUNDARY + Content-Type: image/gif + MIME-Version: 1.0 + <BLANKLINE> + yyy + --BOUNDARY-- + <BLANKLINE> + + +Collapsing multipart/alternative messages +----------------------------------------- + +When content filtering encounters a multipart/alternative part, and the +results of filtering leave only one of the subparts, then the +multipart/alternative may be collapsed. For example, in the following +message, the outer content type is a multipart/mixed. Inside this part is +just a single subpart that has a content type of multipart/alternative. This +inner multipart has two subparts, a jpeg and a gif. + +Content filtering will remove the jpeg part, leaving the multipart/alternative +with only a single gif subpart. Because there's only one subpart left, the +MIME structure of the message will be reorganized, removing the inner +multipart/alternative so that the outer multipart/mixed has just a single gif +subpart. + + >>> mlist.collapse_alternatives = True + >>> flush() + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Content-Type: multipart/mixed; boundary=BOUNDARY + ... MIME-Version: 1.0 + ... + ... --BOUNDARY + ... Content-Type: multipart/alternative; boundary=BOUND2 + ... MIME-Version: 1.0 + ... + ... --BOUND2 + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxx + ... + ... --BOUND2 + ... Content-Type: image/gif + ... MIME-Version: 1.0 + ... + ... yyy + ... --BOUND2-- + ... + ... --BOUNDARY-- + ... """, Message) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Content-Type: multipart/mixed; boundary=BOUNDARY + MIME-Version: 1.0 + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --BOUNDARY + Content-Type: image/gif + MIME-Version: 1.0 + <BLANKLINE> + yyy + --BOUNDARY-- + <BLANKLINE> + +When the outer part is a multipart/alternative and filtering leaves this outer +part with just one subpart, the entire message is converted to the left over +part's content type. In other words, the left over inner part is promoted to +being the outer part. + + >>> mlist.filter_mime_types.append('text/html') + >>> flush() + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Content-Type: multipart/alternative; boundary=AAA + ... + ... --AAA + ... Content-Type: text/html + ... + ... <b>This is some html</b> + ... --AAA + ... Content-Type: text/plain + ... + ... This is plain text + ... --AAA-- + ... """, Message) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Content-Type: text/plain + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + This is plain text + +Clean up. + + >>> ignore = mlist.filter_mime_types.pop() + >>> flush() + + +Conversion to plain text +------------------------ + +Many mailing lists prohibit HTML email, and in fact, such email can be a +phishing or spam vector. However, many mail readers will send HTML email by +default because users think it looks pretty. One approach to handling this +would be to filter out text/html parts and rely on multipart/alternative +collapsing to leave just a plain text part. This works because many mail +readers that send HTML email actually send a plain text part in the second +subpart of such multipart/alternatives. + +While this is a good suggestion for plain text-only mailing lists, often a +mail reader will send only a text/html part with no plain text alternative. +in this case, the site administer can enable text/html to text/plain +conversion by defining a conversion command. A list administrator still needs +to enable such conversion for their list though. + + >>> mlist.convert_html_to_plaintext = True + >>> flush() + +By default, Mailman sends the message through lynx, but since this program is +not guaranteed to exist, we'll craft a simple, but stupid script to simulate +the conversion process. The script expects a single argument, which is the +name of the file containing the message payload to filter. + + >>> import os, sys + >>> script_path = os.path.join(config.DATA_DIR, 'filter.py') + >>> fp = open(script_path, 'w') + >>> try: + ... print >> fp, """\ + ... import sys + ... print 'Converted text/html to text/plain' + ... print 'Filename:', sys.argv[1] + ... """ + ... finally: + ... fp.close() + >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % ( + ... sys.executable, script_path) + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Content-Type: text/html + ... MIME-Version: 1.0 + ... + ... <html><head></head> + ... <body></body></html> + ... """, Message) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + MIME-Version: 1.0 + Content-Type: text/plain + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + Converted text/html to text/plain + Filename: ... + <BLANKLINE> + + +Discarding empty parts +---------------------- + +Similarly, if after filtering a multipart section ends up empty, then the +entire multipart is discarded. For example, here's a message where an inner +multipart/mixed contains two jpeg subparts. Both jpegs are filtered out, so +the entire inner multipart/mixed is discarded. + + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Content-Type: multipart/mixed; boundary=AAA + ... + ... --AAA + ... Content-Type: multipart/mixed; boundary=BBB + ... + ... --BBB + ... Content-Type: image/jpeg + ... + ... xxx + ... --BBB + ... Content-Type: image/jpeg + ... + ... yyy + ... --BBB--- + ... --AAA + ... Content-Type: multipart/alternative; boundary=CCC + ... + ... --CCC + ... Content-Type: text/html + ... + ... <h2>This is a header</h2> + ... + ... --CCC + ... Content-Type: text/plain + ... + ... A different message + ... --CCC-- + ... --AAA + ... Content-Type: image/gif + ... + ... zzz + ... --AAA + ... Content-Type: image/gif + ... + ... aaa + ... --AAA-- + ... """, Message) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Content-Type: multipart/mixed; boundary=AAA + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --AAA + MIME-Version: 1.0 + Content-Type: text/plain + <BLANKLINE> + Converted text/html to text/plain + Filename: ... + <BLANKLINE> + --AAA + Content-Type: image/gif + <BLANKLINE> + zzz + --AAA + Content-Type: image/gif + <BLANKLINE> + aaa + --AAA-- + <BLANKLINE> + + +Passing MIME types +------------------ + +XXX Describe the pass_mime_types setting and how it interacts with +filter_mime_types. === modified file 'Mailman/testing/test_handlers.py' --- a/Mailman/testing/test_handlers.py 2007-07-05 14:29:40 +0000 +++ b/Mailman/testing/test_handlers.py 2007-07-06 03:01:23 +0000 @@ -39,7 +39,6 @@ from Mailman.Handlers import Acknowledge from Mailman.Handlers import AfterDelivery from Mailman.Handlers import Approve -from Mailman.Handlers import MimeDel from Mailman.Handlers import Moderate from Mailman.Handlers import Scrubber # Don't test handlers such as SMTPDirect and Sendmail here @@ -131,180 +130,6 @@ -class TestMimeDel(TestBase): - def setUp(self): - TestBase.setUp(self) - self._mlist.filter_content = 1 - self._mlist.filter_mime_types = ['image/jpeg'] - self._mlist.pass_mime_types = [] - self._mlist.convert_html_to_plaintext = 1 - - def test_outer_matches(self): - msg = email.message_from_string("""\ -From: [EMAIL PROTECTED] -Content-Type: image/jpeg -MIME-Version: 1.0 - -xxxxx -""") - self.assertRaises(Errors.DiscardMessage, MimeDel.process, - self._mlist, msg, {}) - - def test_strain_multipart(self): - eq = self.assertEqual - msg = email.message_from_string("""\ -From: [EMAIL PROTECTED] -Content-Type: multipart/mixed; boundary=BOUNDARY -MIME-Version: 1.0 - ---BOUNDARY -Content-Type: image/jpeg -MIME-Version: 1.0 - -xxx - ---BOUNDARY -Content-Type: image/gif -MIME-Version: 1.0 - -yyy ---BOUNDARY-- -""") - MimeDel.process(self._mlist, msg, {}) - eq(len(msg.get_payload()), 1) - subpart = msg.get_payload(0) - eq(subpart.get_content_type(), 'image/gif') - eq(subpart.get_payload(), 'yyy') - - def test_collapse_multipart_alternative(self): - eq = self.assertEqual - msg = email.message_from_string("""\ -From: [EMAIL PROTECTED] -Content-Type: multipart/mixed; boundary=BOUNDARY -MIME-Version: 1.0 - ---BOUNDARY -Content-Type: multipart/alternative; boundary=BOUND2 -MIME-Version: 1.0 - ---BOUND2 -Content-Type: image/jpeg -MIME-Version: 1.0 - -xxx - ---BOUND2 -Content-Type: image/gif -MIME-Version: 1.0 - -yyy ---BOUND2-- - ---BOUNDARY-- -""") - MimeDel.process(self._mlist, msg, {}) - eq(len(msg.get_payload()), 1) - eq(msg.get_content_type(), 'multipart/mixed') - subpart = msg.get_payload(0) - eq(subpart.get_content_type(), 'image/gif') - eq(subpart.get_payload(), 'yyy') - - def test_convert_to_plaintext(self): - eq = self.assertEqual - # XXX Skip this test if the html->text converter program is not - # available. - program = config.HTML_TO_PLAIN_TEXT_COMMAND.split()[0] - if os.path.isfile(program): - msg = email.message_from_string("""\ -From: [EMAIL PROTECTED] -Content-Type: text/html -MIME-Version: 1.0 - -<html><head></head> -<body></body></html> -""") - MimeDel.process(self._mlist, msg, {}) - eq(msg.get_content_type(), 'text/plain') - eq(msg.get_payload(), '\n\n\n') - - def test_deep_structure(self): - eq = self.assertEqual - self._mlist.filter_mime_types.append('text/html') - msg = email.message_from_string("""\ -From: [EMAIL PROTECTED] -Content-Type: multipart/mixed; boundary=AAA - ---AAA -Content-Type: multipart/mixed; boundary=BBB - ---BBB -Content-Type: image/jpeg - -xxx ---BBB -Content-Type: image/jpeg - -yyy ---BBB--- ---AAA -Content-Type: multipart/alternative; boundary=CCC - ---CCC -Content-Type: text/html - -<h2>This is a header</h2> - ---CCC -Content-Type: text/plain - -A different message ---CCC-- ---AAA -Content-Type: image/gif - -zzz ---AAA -Content-Type: image/gif - -aaa ---AAA-- -""") - MimeDel.process(self._mlist, msg, {}) - payload = msg.get_payload() - eq(len(payload), 3) - part1 = msg.get_payload(0) - eq(part1.get_content_type(), 'text/plain') - eq(part1.get_payload(), 'A different message') - part2 = msg.get_payload(1) - eq(part2.get_content_type(), 'image/gif') - eq(part2.get_payload(), 'zzz') - part3 = msg.get_payload(2) - eq(part3.get_content_type(), 'image/gif') - eq(part3.get_payload(), 'aaa') - - def test_top_multipart_alternative(self): - eq = self.assertEqual - self._mlist.filter_mime_types.append('text/html') - msg = email.message_from_string("""\ -From: [EMAIL PROTECTED] -Content-Type: multipart/alternative; boundary=AAA - ---AAA -Content-Type: text/html - -<b>This is some html</b> ---AAA -Content-Type: text/plain - -This is plain text ---AAA-- -""") - MimeDel.process(self._mlist, msg, {}) - eq(msg.get_content_type(), 'text/plain') - eq(msg.get_payload(), 'This is plain text') - - - class TestScrubber(TestBase): def test_save_attachment(self): mlist = self._mlist @@ -657,7 +482,6 @@ def test_suite(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(TestApprove)) - suite.addTest(unittest.makeSuite(TestMimeDel)) suite.addTest(unittest.makeSuite(TestScrubber)) suite.addTest(unittest.makeSuite(TestToArchive)) suite.addTest(unittest.makeSuite(TestToDigest)) -- (no title) https://code.launchpad.net/~mailman-coders/mailman/3.0 You are receiving this branch notification because you are subscribed to it. To unsubscribe from this branch go to https://code.launchpad.net/~mailman-coders/mailman/3.0/+subscription/mailman-checkins. _______________________________________________ Mailman-checkins mailing list Mailman-checkins@python.org Unsubscribe: http://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org