------------------------------------------------------------ revno: 6522 committer: Barry Warsaw <[EMAIL PROTECTED]> branch nick: 3.0 timestamp: Wed 2007-07-04 00:16:48 -0400 message: Convert the TestTagger to a doctest. No other cleaning up of the handler module is done. added: Mailman/docs/tagger.txt modified: Mailman/testing/test_handlers.py
=== added file 'Mailman/docs/tagger.txt' --- a/Mailman/docs/tagger.txt 1970-01-01 00:00:00 +0000 +++ b/Mailman/docs/tagger.txt 2007-07-04 04:16:48 +0000 @@ -0,0 +1,244 @@ +Message tagger +============== + +Mailman has a topics system which works like this: a mailing list +administrator sets up one or more topics, which is essentially a named regular +expression. The topic name can be any arbitrary string, and the name serves +double duty as the 'topic tag'. Each message that flows the mailing list has +its Subject: and Keywords: headers compared against these regular +expressions. The message then gets tagged with the topic names of each hit. + + >>> from Mailman.Handlers.Tagger import process + >>> from Mailman.Message import Message + >>> from Mailman.Queue.Switchboard import Switchboard + >>> from Mailman.configuration import config + >>> from Mailman.database import flush + >>> from email import message_from_string + >>> mlist = config.list_manager.create('[EMAIL PROTECTED]') + +Topics must be enabled for Mailman to do any topic matching, even if topics +are defined. + + >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)] + >>> mlist.topics_enabled = False + >>> mlist.topics_bodylines_limit = 0 + >>> flush() + + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + <BLANKLINE> + <BLANKLINE> + >>> msgdata + {} + +However, once topics are enabled, message will be tagged. There are two +artifacts of tagging; an X-Topics: header is added with the topic name, and +the message metadata gets a key with a list of matching topic names. + + >>> mlist.topics_enabled = True + >>> flush() + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + X-Topics: bar fight + <BLANKLINE> + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning body lines +------------------- + +The tagger can also look at a certain number of body lines, but only for +Subject: and Keyword: header-like lines. When set to zero, no body lines are +scanned. + + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Subject: nothing + Keywords: at all + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata + {} + +But let the tagger scan a few body lines and the matching headers will be +found. + + >>> mlist.topics_bodylines_limit = 5 + >>> flush() + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Subject: nothing + Keywords: at all + X-Topics: bar fight + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +However, scanning stops at the first body line that doesn't look like a +header. + + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Subject: nothing + ... Keywords: at all + ... + ... This is not a header + ... Subject: foobar + ... Keywords: barbaz + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: [EMAIL PROTECTED] + Subject: nothing + Keywords: at all + <BLANKLINE> + This is not a header + Subject: foobar + Keywords: barbaz + >>> msgdata + {} + +When set to a negative number, all body lines will be scanned. + + >>> mlist.topics_bodylines_limit = -1 + >>> flush() + >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100) + >>> msg = message_from_string("""\ + ... From: [EMAIL PROTECTED] + ... Subject: nothing + ... Keywords: at all + ... + ... %s + ... Subject: foobar + ... Keywords: barbaz + ... """ % lots_of_headers, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> # Rather than print out 100 X-Ignore: headers, let's just prove that + >>> # the X-Topics: header exists, meaning that the tagger did its job. + >>> msg['x-topics'] + 'bar fight' + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning sub-parts +------------------ + +The tagger will also scan the body lines of text subparts in a multipart +message, using the same rules as if all those body lines lived in a single +text payload. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: Was + Keywords: Raw + Content-Type: multipart/alternative; boundary="BOUNDARY" + X-Topics: bar fight + <BLANKLINE> + --BOUNDARY + From: sabo + To: obas + <BLANKLINE> + Subject: farbaw + Keywords: barbaz + <BLANKLINE> + --BOUNDARY-- + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +But the tagger will not descend into non-text parts. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary=BOUNDARY + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg['x-topics'] + None + >>> msgdata + {} === modified file 'Mailman/testing/test_handlers.py' --- a/Mailman/testing/test_handlers.py 2007-07-03 05:09:53 +0000 +++ b/Mailman/testing/test_handlers.py 2007-07-04 04:16:48 +0000 @@ -44,7 +44,6 @@ from Mailman.Handlers import Scrubber # Don't test handlers such as SMTPDirect and Sendmail here from Mailman.Handlers import SpamDetect -from Mailman.Handlers import Tagger from Mailman.Handlers import ToArchive from Mailman.Handlers import ToDigest from Mailman.Handlers import ToOutgoing @@ -434,121 +433,6 @@ -class TestTagger(TestBase): - def setUp(self): - TestBase.setUp(self) - self._mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', 1)] - self._mlist.topics_enabled = 1 - - def test_short_circuit(self): - self._mlist.topics_enabled = 0 - rtn = Tagger.process(self._mlist, None, {}) - # Not really a great test, but there's little else to assert - self.assertEqual(rtn, None) - - def test_simple(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = 0 - msg = email.message_from_string("""\ -Subject: foobar -Keywords: barbaz - -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], 'bar fight') - eq(msgdata.get('topichits'), ['bar fight']) - - def test_all_body_lines_plain_text(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = -1 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw - -Subject: farbaw -Keywords: barbaz -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], 'bar fight') - eq(msgdata.get('topichits'), ['bar fight']) - - def test_no_body_lines(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = 0 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw - -Subject: farbaw -Keywords: barbaz -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], None) - eq(msgdata.get('topichits'), None) - - def test_body_lines_in_multipart(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = -1 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw -Content-Type: multipart/alternative; boundary="BOUNDARY" - ---BOUNDARY -From: sabo -To: obas - -Subject: farbaw -Keywords: barbaz - ---BOUNDARY-- -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], 'bar fight') - eq(msgdata.get('topichits'), ['bar fight']) - - def test_body_lines_no_part(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = -1 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw -Content-Type: multipart/alternative; boundary=BOUNDARY - ---BOUNDARY -From: sabo -To: obas -Content-Type: message/rfc822 - -Subject: farbaw -Keywords: barbaz - ---BOUNDARY -From: sabo -To: obas -Content-Type: message/rfc822 - -Subject: farbaw -Keywords: barbaz - ---BOUNDARY-- -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], None) - eq(msgdata.get('topichits'), None) - - - class TestToArchive(TestBase): def setUp(self): TestBase.setUp(self) @@ -813,7 +697,6 @@ suite.addTest(unittest.makeSuite(TestModerate)) suite.addTest(unittest.makeSuite(TestScrubber)) suite.addTest(unittest.makeSuite(TestSpamDetect)) - suite.addTest(unittest.makeSuite(TestTagger)) suite.addTest(unittest.makeSuite(TestToArchive)) suite.addTest(unittest.makeSuite(TestToDigest)) suite.addTest(unittest.makeSuite(TestToOutgoing)) -- (no title) https://code.launchpad.net/~mailman-coders/mailman/3.0 You are receiving this branch notification because you are subscribed to it. To unsubscribe from this branch go to https://code.launchpad.net/~mailman-coders/mailman/3.0/+subscription/mailman-checkins. _______________________________________________ Mailman-checkins mailing list Mailman-checkins@python.org Unsubscribe: http://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org