Barry Warsaw pushed to branch master at mailman / Mailman
Commits: 6a229576 by Aurélien Bompard at 2015-10-20T21:10:34Z Handle header-match rule-specific action - - - - - ece55099 by Aurélien Bompard at 2015-10-20T21:10:34Z Convert header_filter_rules from 2.1 to header_matches - - - - - 741fdc63 by Aurélien Bompard at 2015-10-20T21:10:34Z Use a separate table for header_matches - - - - - 583a7639 by Aurélien Bompard at 2015-10-20T21:10:34Z Rename from plural to singular - - - - - ed772e4f by Aurélien Bompard at 2015-10-20T21:10:34Z Adapt the code and the tests to the new HeaderMatch object - - - - - d468d096 by Aurélien Bompard at 2015-10-20T21:10:35Z Make sure site-wide header_matches take precedence over list-specific ones - - - - - b9baf43a by Aurélien Bompard at 2015-10-20T21:10:35Z Implement changes from the review - - - - - e2f6b111 by Aurélien Bompard at 2015-10-20T21:10:35Z Fix a code typo in the docs - - - - - 87f2f50b by Aurélien Bompard at 2015-10-20T21:10:35Z Use and interface for the set of header_matches - - - - - 0fcb68ac by Aurélien Bompard at 2015-10-20T21:10:35Z Test schema migration for the header matches - - - - - adb9e316 by Aurélien Bompard at 2015-10-20T21:10:35Z Be compatible with older versions of SQLAlchemy and Alembic - - - - - 5f917c8a by Barry Warsaw at 2015-10-20T22:48:59Z Clean up pass through abompard's branch. - - - - - 5104e712 by Barry Warsaw at 2015-10-20T22:51:37Z Add NEWS. - - - - - 724b7cee by Barry Warsaw at 2015-10-20T22:58:33Z Mailing lists can now have their own header matching rules, although site-defined rules still take precedence. Importing a Mailman 2.1 list with header matching rules defined will create them in Mailman 3, albeit with a few unsupported corner cases. Definition of new header matching rules is not yet exposed through the REST API. Given by Aurélien Bompard. Code cleaning pass by Barry Warsaw. Closes !42 - - - - - 12 changed files: - src/mailman/chains/headers.py - src/mailman/chains/tests/test_headers.py - src/mailman/config/configure.zcml - + src/mailman/database/alembic/versions/42756496720_header_matches.py - src/mailman/database/tests/test_migrations.py - src/mailman/docs/NEWS.rst - src/mailman/interfaces/mailinglist.py - src/mailman/model/mailinglist.py - src/mailman/model/tests/test_mailinglist.py - src/mailman/rules/docs/header-matching.rst - src/mailman/utilities/importer.py - src/mailman/utilities/tests/test_import.py Changes: ===================================== src/mailman/chains/headers.py ===================================== --- a/src/mailman/chains/headers.py +++ b/src/mailman/chains/headers.py @@ -37,22 +37,29 @@ log = logging.getLogger('mailman.error') -def make_link(header, pattern): +def make_link(header, pattern, chain=None): """Create a Link object. - The link action is always to defer, since at the end of all the header - checks, we'll jump to the chain defined in the configuration file, should - any of them have matched. + The link action is to defer by default, since at the end of all the + header checks, we'll jump to the chain defined in the configuration + file, should any of them have matched. However, it is possible to + create a link which jumps to a specific chain. :param header: The email header name to check, e.g. X-Spam. :type header: string :param pattern: A regular expression for matching the header value. :type pattern: string + :param chain: When given, this is the chain to jump to if the + pattern matches the header. + :type chain: string :return: The link representing this rule check. :rtype: `ILink` """ rule = HeaderMatchRule(header, pattern) - return Link(rule, LinkAction.defer) + if chain is None: + return Link(rule) + chain = config.chains[chain] + return Link(rule, LinkAction.jump, chain) @@ -132,17 +139,18 @@ class HeaderMatchChain(Chain): parts = line.split(':', 1) if len(parts) != 2: log.error('Configuration error: [antispam]header_checks ' - 'contains bogus line: {0}'.format(line)) + 'contains bogus line: {}'.format(line)) continue yield make_link(parts[0], parts[1].lstrip()) - # Then return all the list-specific header matches. - # Python 3.3: Use 'yield from' - for entry in mlist.header_matches: - yield make_link(*entry) # Then return all the explicitly added links. - for link in self._extended_links: - yield link - # Finally, if any of the above rules matched, jump to the chain - # defined in the configuration file. - yield Link(config.rules['any'], LinkAction.jump, + yield from self._extended_links + # If any of the above rules matched, they will have deferred their + # action until now, so jump to the chain defined in the configuration + # file. For security considerations, this takes precedence over + # list-specific matches. + yield Link(config.rules['any'], + LinkAction.jump, config.chains[config.antispam.jump_chain]) + # Then return all the list-specific header matches. + for entry in mlist.header_matches: + yield make_link(entry.header, entry.pattern, entry.chain) ===================================== src/mailman/chains/tests/test_headers.py ===================================== --- a/src/mailman/chains/tests/test_headers.py +++ b/src/mailman/chains/tests/test_headers.py @@ -25,12 +25,16 @@ __all__ = [ import unittest from mailman.app.lifecycle import create_list -from mailman.chains.headers import HeaderMatchRule +from mailman.chains.headers import HeaderMatchRule, make_link from mailman.config import config +from mailman.core.chains import process from mailman.email.message import Message -from mailman.interfaces.chain import LinkAction +from mailman.interfaces.chain import LinkAction, HoldEvent +from mailman.interfaces.mailinglist import IHeaderMatchSet +from mailman.testing.helpers import ( + LogFileMark, configuration, event_subscribers, + specialized_message_from_string as mfs) from mailman.testing.layers import ConfigLayer -from mailman.testing.helpers import LogFileMark, configuration @@ -42,6 +46,24 @@ class TestHeaderChain(unittest.TestCase): def setUp(self): self._mlist = create_list('t...@example.com') + def test_make_link(self): + # Test that make_link() with no given chain creates a Link with a + # deferred link action. + link = make_link('Subject', '[tT]esting') + self.assertEqual(link.rule.header, 'Subject') + self.assertEqual(link.rule.pattern, '[tT]esting') + self.assertEqual(link.action, LinkAction.defer) + self.assertIsNone(link.chain) + + def test_make_link_with_chain(self): + # Test that make_link() with a given chain creates a Link with a jump + # action to the chain. + link = make_link('Subject', '[tT]esting', 'accept') + self.assertEqual(link.rule.header, 'Subject') + self.assertEqual(link.rule.pattern, '[tT]esting') + self.assertEqual(link.action, LinkAction.jump) + self.assertEqual(link.chain, config.chains['accept']) + @configuration('antispam', header_checks=""" Foo: a+ Bar: bb? @@ -119,3 +141,66 @@ class TestHeaderChain(unittest.TestCase): HeaderMatchRule, 'x-spam-score', '.*') finally: config.rules = saved_rules + + def test_list_rule(self): + # Test that the header-match chain has the header checks from the + # mailing-list configuration. + chain = config.chains['header-match'] + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('Foo', 'a+') + links = [link for link in chain.get_links(self._mlist, Message(), {}) + if link.rule.name != 'any'] + self.assertEqual(len(links), 1) + self.assertEqual(links[0].action, LinkAction.defer) + self.assertEqual(links[0].rule.header, 'foo') + self.assertEqual(links[0].rule.pattern, 'a+') + + def test_list_complex_rule(self): + # Test that the mailing-list header-match complex rules are read + # properly. + chain = config.chains['header-match'] + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('Foo', 'a+', 'reject') + header_matches.add('Bar', 'b+', 'discard') + header_matches.add('Baz', 'z+', 'accept') + links = [link for link in chain.get_links(self._mlist, Message(), {}) + if link.rule.name != 'any'] + self.assertEqual(len(links), 3) + self.assertEqual([ + (link.rule.header, link.rule.pattern, link.action, link.chain.name) + for link in links + ], + [('foo', 'a+', LinkAction.jump, 'reject'), + ('bar', 'b+', LinkAction.jump, 'discard'), + ('baz', 'z+', LinkAction.jump, 'accept'), + ]) + + @configuration('antispam', header_checks=""" + Foo: foo + """, jump_chain='hold') + def test_priority_site_over_list(self): + # Test that the site-wide checks take precedence over the list-specific + # checks. + msg = mfs("""\ +From: a...@example.com +To: t...@example.com +Subject: A message +Message-ID: <ant> +Foo: foo +MIME-Version: 1.0 + +A message body. +""") + msgdata = {} + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('Foo', 'foo', 'accept') + # This event subscriber records the event that occurs when the message + # is processed by the owner chain. + events = [] + with event_subscribers(events.append): + process(self._mlist, msg, msgdata, start_chain='header-match') + self.assertEqual(len(events), 1) + event = events[0] + # Site-wide wants to hold the message, the list wants to accept it. + self.assertTrue(isinstance(event, HoldEvent)) + self.assertEqual(event.chain, config.chains['hold']) ===================================== src/mailman/config/configure.zcml ===================================== --- a/src/mailman/config/configure.zcml +++ b/src/mailman/config/configure.zcml @@ -36,6 +36,12 @@ <adapter for="mailman.interfaces.mailinglist.IMailingList" + provides="mailman.interfaces.mailinglist.IHeaderMatchSet" + factory="mailman.model.mailinglist.HeaderMatchSet" + /> + + <adapter + for="mailman.interfaces.mailinglist.IMailingList" provides="mailman.interfaces.requests.IListRequests" factory="mailman.model.requests.ListRequests" /> ===================================== src/mailman/database/alembic/versions/42756496720_header_matches.py ===================================== --- /dev/null +++ b/src/mailman/database/alembic/versions/42756496720_header_matches.py @@ -0,0 +1,89 @@ +"""header_matches + +Revision ID: 42756496720 +Revises: 2bb9b382198 +Create Date: 2015-09-11 10:11:38.310315 + +""" + +# revision identifiers, used by Alembic. +revision = '42756496720' +down_revision = '2bb9b382198' + +import sqlalchemy as sa + +from alembic import op +from mailman.database.helpers import is_sqlite, exists_in_db + + +def upgrade(): + # Create the new table + header_match_table = op.create_table( + 'headermatch', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('mailing_list_id', sa.Integer(), nullable=True), + sa.Column('header', sa.Unicode(), nullable=False), + sa.Column('pattern', sa.Unicode(), nullable=False), + sa.Column('chain', sa.Unicode(), nullable=True), + sa.ForeignKeyConstraint(['mailing_list_id'], ['mailinglist.id'], ), + sa.PrimaryKeyConstraint('id') + ) + # Now migrate the data. It can't be offline because we need to read the + # pickles. + connection = op.get_bind() + # Don't import the table definition from the models, it may break this + # migration when the model is updated in the future (see the Alembic doc). + mlist_table = sa.sql.table( + 'mailinglist', + sa.sql.column('id', sa.Integer), + sa.sql.column('header_matches', sa.PickleType) + ) + for mlist_id, old_matches in connection.execute(mlist_table.select()): + for old_match in old_matches: + connection.execute(header_match_table.insert().values( + mailing_list_id=mlist_id, + header=old_match[0], + pattern=old_match[1], + chain=None + )) + # Now that data is migrated, drop the old column (except on SQLite which + # does not support this) + if not is_sqlite(connection): + op.drop_column('mailinglist', 'header_matches') + + +def downgrade(): + if not exists_in_db(op.get_bind(), 'mailinglist', 'header_matches'): + # SQLite will not have deleted the former column, since it does not + # support column deletion. + op.add_column( + 'mailinglist', + sa.Column('header_matches', sa.PickleType, nullable=True)) + # Now migrate the data. It can't be offline because we need to read the + # pickles. + connection = op.get_bind() + # Don't import the table definition from the models, it may break this + # migration when the model is updated in the future (see the Alembic doc). + mlist_table = sa.sql.table( + 'mailinglist', + sa.sql.column('id', sa.Integer), + sa.sql.column('header_matches', sa.PickleType) + ) + header_match_table = sa.sql.table( + 'headermatch', + sa.sql.column('mailing_list_id', sa.Integer), + sa.sql.column('header', sa.Unicode), + sa.sql.column('pattern', sa.Unicode), + ) + for mlist_id, header, pattern in connection.execute( + header_match_table.select()).fetchall(): + mlist = connection.execute(mlist_table.select().where( + mlist_table.c.id == mlist_id)).fetchone() + header_matches = mlist['header_matches'] + if not header_matches: + header_matches = [] + header_matches.append((header, pattern)) + connection.execute(mlist_table.update().where( + mlist_table.c.id == mlist_id).values( + header_matches=header_matches)) + op.drop_table('headermatch') ===================================== src/mailman/database/tests/test_migrations.py ===================================== --- a/src/mailman/database/tests/test_migrations.py +++ b/src/mailman/database/tests/test_migrations.py @@ -28,6 +28,7 @@ import sqlalchemy as sa from mailman.config import config from mailman.database.alembic import alembic_cfg +from mailman.database.helpers import exists_in_db from mailman.database.model import Model from mailman.testing.layers import ConfigLayer @@ -41,17 +42,61 @@ class TestMigrations(unittest.TestCase): def tearDown(self): # Drop and restore a virgin database. + config.db.store.rollback() md = sa.MetaData(bind=config.db.engine) md.reflect() + # We have circular dependencies between user and address, thus we can't + # use drop_all() without getting a warning. Setting use_alter to True + # on the foreign keys helps SQLAlchemy mark those loops as known. + for tablename in ('user', 'address'): + if tablename not in md.tables: + continue + for fk in md.tables[tablename].foreign_keys: + fk.constraint.use_alter = True md.drop_all() Model.metadata.create_all(config.db.engine) def test_all_migrations(self): script_dir = alembic.script.ScriptDirectory.from_config(alembic_cfg) - revisions = [sc.revision for sc in - script_dir.walk_revisions('base', 'heads')] + revisions = [sc.revision for sc in script_dir.walk_revisions()] for revision in revisions: alembic.command.downgrade(alembic_cfg, revision) revisions.reverse() for revision in revisions: alembic.command.upgrade(alembic_cfg, revision) + + def test_42756496720_header_matches(self): + test_header_matches = [ + ('test-header-1', 'test-pattern-1'), + ('test-header-2', 'test-pattern-2'), + ('test-header-3', 'test-pattern-3'), + ] + mlist_table = sa.sql.table( + 'mailinglist', + sa.sql.column('id', sa.Integer), + sa.sql.column('header_matches', sa.PickleType) + ) + header_match_table = sa.sql.table( + 'headermatch', + sa.sql.column('mailing_list_id', sa.Integer), + sa.sql.column('header', sa.Unicode), + sa.sql.column('pattern', sa.Unicode), + ) + # Downgrading. + config.db.store.execute(mlist_table.insert().values(id=1)) + config.db.store.execute(header_match_table.insert().values( + [{'mailing_list_id': 1, 'header': hm[0], 'pattern': hm[1]} + for hm in test_header_matches])) + config.db.store.commit() + alembic.command.downgrade(alembic_cfg, '2bb9b382198') + results = config.db.store.execute( + mlist_table.select()).fetchall() + self.assertEqual(results[0].header_matches, test_header_matches) + self.assertFalse(exists_in_db(config.db.engine, 'headermatch')) + config.db.store.commit() + # Upgrading. + alembic.command.upgrade(alembic_cfg, '42756496720') + results = config.db.store.execute( + header_match_table.select()).fetchall() + self.assertEqual(results, + [(1, hm[0], hm[1]) for hm in test_header_matches]) ===================================== src/mailman/docs/NEWS.rst ===================================== --- a/src/mailman/docs/NEWS.rst +++ b/src/mailman/docs/NEWS.rst @@ -43,6 +43,11 @@ Bugs Configuration ------------- + * Mailing lists can now have their own header matching rules, although + site-defined rules still take precedence. Importing a Mailman 2.1 list + with header matching rules defined will create them in Mailman 3, albeit + with a few unsupported corner cases. Definition of new header matching + rules is not yet exposed through the REST API. Given by Aurélien Bompard. * The default languages from Mailman 2.1 have been ported over. Given by Aurélien Bompard. ===================================== src/mailman/interfaces/mailinglist.py ===================================== --- a/src/mailman/interfaces/mailinglist.py +++ b/src/mailman/interfaces/mailinglist.py @@ -20,6 +20,7 @@ __all__ = [ 'IAcceptableAlias', 'IAcceptableAliasSet', + 'IHeaderMatch', 'IListArchiver', 'IListArchiverSet', 'IMailingList', @@ -839,3 +840,61 @@ class IListArchiverSet(Interface): :return: the matching `IListArchiver` or None if the named archiver does not exist. """ + + + +class IHeaderMatch(Interface): + """A mailing list-specific message header matching rule.""" + + mailing_list = Attribute( + """The mailing list for the header match.""") + + header = Attribute( + """The email header that will be checked.""") + + pattern = Attribute( + """The regular expression to match.""") + + chain = Attribute( + """The chain to jump to on a match. + + If it is None, the `[antispam]jump_chain` action in the configuration + file is used. + """) + + +class IHeaderMatchSet(Interface): + """The set of header matching rules for a mailing list.""" + + def clear(): + """Clear the set of header matching rules.""" + + def add(header, pattern, chain=None): + """Add the given header matching rule to this mailing list's set. + + :param header: The email header to filter on. It will be converted to + lower case for consistency. + :type header: string + :param pattern: The regular expression to use. + :type pattern: string + :param chain: The chain to jump to, or None to use the site-wide + configuration. Defaults to None. + :type chain: string or None + :raises ValueError: if the header/pattern pair already exists for this + mailing list. + """ + + def remove(header, pattern): + """Remove the given header matching rule from this mailing list's set. + + :param header: The email header part of the rule to be removed. + :type header: string + :param pattern: The regular expression part of the rule to be removed. + :type pattern: string + """ + + def __iter__(): + """An iterator over all the IHeaderMatches defined in this set. + + :return: iterator over `IHeaderMatch`. + """ ===================================== src/mailman/model/mailinglist.py ===================================== --- a/src/mailman/model/mailinglist.py +++ b/src/mailman/model/mailinglist.py @@ -37,8 +37,9 @@ from mailman.interfaces.digests import DigestFrequency from mailman.interfaces.domain import IDomainManager from mailman.interfaces.languages import ILanguageManager from mailman.interfaces.mailinglist import ( - IAcceptableAlias, IAcceptableAliasSet, IListArchiver, IListArchiverSet, - IMailingList, Personalization, ReplyToMunging, SubscriptionPolicy) + IAcceptableAlias, IAcceptableAliasSet, IHeaderMatch, IHeaderMatchSet, + IListArchiver, IListArchiverSet, IMailingList, Personalization, + ReplyToMunging, SubscriptionPolicy) from mailman.interfaces.member import ( AlreadySubscribedError, MemberRole, MissingPreferredAddressError, SubscriptionEvent) @@ -57,6 +58,7 @@ from sqlalchemy import ( LargeBinary, PickleType, Unicode) from sqlalchemy.event import listen from sqlalchemy.orm import relationship +from sqlalchemy.orm.exc import NoResultFound from urllib.parse import urljoin from zope.component import getUtility from zope.event import notify @@ -149,7 +151,6 @@ class MailingList(Model): gateway_to_mail = Column(Boolean) gateway_to_news = Column(Boolean) goodbye_message_uri = Column(Unicode) - header_matches = Column(PickleType) header_uri = Column(Unicode) hold_these_nonmembers = Column(PickleType) info = Column(Unicode) @@ -621,3 +622,70 @@ class ListArchiverSet: return store.query(ListArchiver).filter( ListArchiver.mailing_list == self._mailing_list, ListArchiver.name == archiver_name).first() + + + +@implementer(IHeaderMatch) +class HeaderMatch(Model): + """See `IHeaderMatch`.""" + + __tablename__ = 'headermatch' + + id = Column(Integer, primary_key=True) + + mailing_list_id = Column(Integer, ForeignKey('mailinglist.id')) + mailing_list = relationship('MailingList', backref='header_matches') + + header = Column(Unicode) + pattern = Column(Unicode) + chain = Column(Unicode, nullable=True) + + + +@implementer(IHeaderMatchSet) +class HeaderMatchSet: + """See `IHeaderMatchSet`.""" + + def __init__(self, mailing_list): + self._mailing_list = mailing_list + + @dbconnection + def clear(self, store): + """See `IHeaderMatchSet`.""" + store.query(HeaderMatch).filter( + HeaderMatch.mailing_list == self._mailing_list).delete() + + @dbconnection + def add(self, store, header, pattern, chain=None): + header = header.lower() + existing = store.query(HeaderMatch).filter( + HeaderMatch.mailing_list == self._mailing_list, + HeaderMatch.header == header, + HeaderMatch.pattern == pattern).count() + if existing > 0: + raise ValueError('Pattern already exists') + header_match = HeaderMatch( + mailing_list=self._mailing_list, + header=header, pattern=pattern, chain=chain) + store.add(header_match) + + @dbconnection + def remove(self, store, header, pattern): + header = header.lower() + # Don't just filter and use delete(), or the MailingList.header_matches + # collection will not be updated: + # http://docs.sqlalchemy.org/en/rel_1_0/orm/collections.html#dynamic-relationship-loaders + try: + existing = store.query(HeaderMatch).filter( + HeaderMatch.mailing_list == self._mailing_list, + HeaderMatch.header == header, + HeaderMatch.pattern == pattern).one() + except NoResultFound: + raise ValueError('Pattern does not exist') + else: + self._mailing_list.header_matches.remove(existing) + + @dbconnection + def __iter__(self, store): + yield from store.query(HeaderMatch).filter( + HeaderMatch.mailing_list == self._mailing_list) ===================================== src/mailman/model/tests/test_mailinglist.py ===================================== --- a/src/mailman/model/tests/test_mailinglist.py +++ b/src/mailman/model/tests/test_mailinglist.py @@ -32,7 +32,7 @@ from mailman.config import config from mailman.database.transaction import transaction from mailman.interfaces.listmanager import IListManager from mailman.interfaces.mailinglist import ( - IAcceptableAliasSet, IListArchiverSet) + IAcceptableAliasSet, IHeaderMatchSet, IListArchiverSet) from mailman.interfaces.member import ( AlreadySubscribedError, MemberRole, MissingPreferredAddressError) from mailman.interfaces.usermanager import IUserManager @@ -163,3 +163,58 @@ class TestAcceptableAliases(unittest.TestCase): self.assertEqual(['b...@example.com'], list(alias_set.aliases)) getUtility(IListManager).delete(self._mlist) self.assertEqual(len(list(alias_set.aliases)), 0) + + + +class TestHeaderMatch(unittest.TestCase): + layer = ConfigLayer + + def setUp(self): + self._mlist = create_list('a...@example.com') + + def test_lowercase_header(self): + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('Header', 'pattern') + self.assertEqual(len(self._mlist.header_matches), 1) + self.assertEqual(self._mlist.header_matches[0].header, 'header') + + def test_chain_defaults_to_none(self): + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('header', 'pattern') + self.assertEqual(len(self._mlist.header_matches), 1) + self.assertEqual(self._mlist.header_matches[0].chain, None) + + def test_duplicate(self): + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('Header', 'pattern') + self.assertRaises( + ValueError, header_matches.add, 'Header', 'pattern') + self.assertEqual(len(self._mlist.header_matches), 1) + + def test_remove_non_existent(self): + header_matches = IHeaderMatchSet(self._mlist) + self.assertRaises( + ValueError, header_matches.remove, 'header', 'pattern') + + def test_add_remove(self): + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('header', 'pattern') + self.assertEqual(len(self._mlist.header_matches), 1) + header_matches.remove('header', 'pattern') + self.assertEqual(len(self._mlist.header_matches), 0) + + def test_iterator(self): + header_matches = IHeaderMatchSet(self._mlist) + header_matches.add('Header', 'pattern') + header_matches.add('Subject', 'patt.*') + header_matches.add('From', '.*@example.com', 'discard') + header_matches.add('From', '.*@example.org', 'accept') + matches = sorted((match.header, match.pattern, match.chain) + for match in IHeaderMatchSet(self._mlist)) + self.assertEqual( + matches, + [('from', '.*@example.com', 'discard'), + ('from', '.*@example.org', 'accept'), + ('header', 'pattern', None), + ('subject', 'patt.*', None), + ]) ===================================== src/mailman/rules/docs/header-matching.rst ===================================== --- a/src/mailman/rules/docs/header-matching.rst +++ b/src/mailman/rules/docs/header-matching.rst @@ -119,13 +119,21 @@ List-specific header matching ============================= Each mailing list can also be configured with a set of header matching regular -expression rules. These are used to impose list-specific header filtering -with the same semantics as the global `[antispam]` section. +expression rules. These can be used to impose list-specific header filtering +with the same semantics as the global ``[antispam]`` section, or to have a +different action. + +To follow the global antispam action, the header match rule must not specify a +``chain`` to jump to. If the default antispam action is changed in the +configuration file and Mailman is restarted, those rules will get the new jump +action. The list administrator wants to match not on four stars, but on three plus signs, but only for the current mailing list. - >>> mlist.header_matches = [('x-spam-score', '[+]{3,}')] + >>> from mailman.interfaces.mailinglist import IHeaderMatchSet + >>> header_matches = IHeaderMatchSet(mlist) + >>> header_matches.add('x-spam-score', '[+]{3,}') A message with a spam score of two pluses does not match. @@ -139,8 +147,8 @@ A message with a spam score of two pluses does not match. x-spam-score: [+]{3,} But a message with a spam score of three pluses does match. Because a message -with the previous Message-Id is already in the moderation queue, we need to -give this message a new Message-Id. +with the previous ``Message-Id`` is already in the moderation queue, we need +to give this message a new ``Message-Id``. >>> msgdata = {} >>> del msg['x-spam-score'] @@ -165,3 +173,25 @@ As does a message with a spam score of four pluses. Rule hits: x-spam-score: [+]{3,} No rules missed + +Now, the list administrator wants to match on three plus signs, but wants +those emails to be discarded instead of held. + + >>> header_matches.remove('x-spam-score', '[+]{3,}') + >>> header_matches.add('x-spam-score', '[+]{3,}', 'discard') + +A message with a spam score of three pluses will still match, and the message +will be discarded. + + >>> msgdata = {} + >>> del msg['x-spam-score'] + >>> msg['X-Spam-Score'] = '+++' + >>> del msg['message-id'] + >>> msg['Message-Id'] = '<dog>' + >>> with event_subscribers(handler): + ... process(mlist, msg, msgdata, 'header-match') + DiscardEvent discard <dog> + >>> hits_and_misses(msgdata) + Rule hits: + x-spam-score: [+]{3,} + No rules missed ===================================== src/mailman/utilities/importer.py ===================================== --- a/src/mailman/utilities/importer.py +++ b/src/mailman/utilities/importer.py @@ -24,8 +24,10 @@ __all__ = [ import os +import re import sys import codecs +import logging import datetime from mailman.config import config @@ -39,7 +41,7 @@ from mailman.interfaces.bans import IBanManager from mailman.interfaces.bounce import UnrecognizedBounceDisposition from mailman.interfaces.digests import DigestFrequency from mailman.interfaces.languages import ILanguageManager -from mailman.interfaces.mailinglist import IAcceptableAliasSet +from mailman.interfaces.mailinglist import IAcceptableAliasSet, IHeaderMatchSet from mailman.interfaces.mailinglist import Personalization, ReplyToMunging from mailman.interfaces.mailinglist import SubscriptionPolicy from mailman.interfaces.member import DeliveryMode, DeliveryStatus, MemberRole @@ -51,6 +53,8 @@ from sqlalchemy import Boolean from urllib.error import URLError from zope.component import getUtility +log = logging.getLogger('mailman.error') + class Import21Error(MailmanError): @@ -124,6 +128,24 @@ def nonmember_action_mapping(value): 3: Action.discard, }[value] + +def action_to_chain(value): + # Converts an action number in Mailman 2.1 to the name of the corresponding + # chain in 3.x. The actions 'approve', 'subscribe' and 'unsubscribe' are + # ignored. The defer action is converted to None, because it is not + # a jump to a terminal chain. + return { + 0: None, + #1: 'approve', + 2: 'reject', + 3: 'discard', + #4: 'subscribe', + #5: 'unsubscribe', + 6: 'accept', + 7: 'hold', + }[value] + + def check_language_code(code): if code is None: @@ -310,6 +332,53 @@ def import_config_pck(mlist, config_dict): # When .add() rejects this, the line probably contains a regular # expression. Make that explicit for MM3. alias_set.add('^' + address) + # Handle header_filter_rules conversion to header_matches. + header_match_set = IHeaderMatchSet(mlist) + header_filter_rules = config_dict.get('header_filter_rules', []) + for line_patterns, action, _unused in header_filter_rules: + try: + chain = action_to_chain(action) + except KeyError: + log.warning('Unsupported header_filter_rules action: %r', + action) + continue + # Now split the line into a header and a pattern. + for line_pattern in line_patterns.splitlines(): + if len(line_pattern.strip()) == 0: + continue + for sep in (': ', ':.', ':'): + header, sep, pattern = line_pattern.partition(sep) + if sep: + # We found it. + break + else: + # Matches any header, which is not supported. XXX + log.warning('Unsupported header_filter_rules pattern: %r', + line_pattern) + continue + header = header.strip().lstrip('^').lower() + header = header.replace('\\', '') + if not header: + log.warning( + 'Cannot parse the header in header_filter_rule: %r', + line_pattern) + continue + if len(pattern) == 0: + # The line matched only the header, therefore the header can + # be anything. + pattern = '.*' + try: + re.compile(pattern) + except re.error: + log.warning('Skipping header_filter rule because of an ' + 'invalid regular expression: %r', line_pattern) + continue + try: + header_match_set.add(header, pattern, chain) + except ValueError: + log.warning('Skipping duplicate header_filter rule: %r', + line_pattern) + continue # Handle conversion to URIs. In MM2.1, the decorations are strings # containing placeholders, and there's no provision for language-specific # templates. In MM3, template locations are specified by URLs with the ===================================== src/mailman/utilities/tests/test_import.py ===================================== --- a/src/mailman/utilities/tests/test_import.py +++ b/src/mailman/utilities/tests/test_import.py @@ -49,6 +49,7 @@ from mailman.interfaces.member import DeliveryMode, DeliveryStatus from mailman.interfaces.nntp import NewsgroupModeration from mailman.interfaces.templates import ITemplateLoader from mailman.interfaces.usermanager import IUserManager +from mailman.testing.helpers import LogFileMark from mailman.testing.layers import ConfigLayer from mailman.utilities.filesystem import makedirs from mailman.utilities.importer import import_config_pck, Import21Error @@ -330,6 +331,157 @@ class TestBasicImport(unittest.TestCase): self.assertEqual(self._mlist.subscription_policy, SubscriptionPolicy.confirm_then_moderate) + def test_header_matches(self): + # This test contail real cases of header_filter_rules + self._pckdict['header_filter_rules'] = [ + ('X\\-Spam\\-Status\\: Yes.*', 3, False), + ('^X-Spam-Status: Yes\r\n\r\n', 2, False), + ('^X-Spam-Level: \\*\\*\\*.*$', 3, False), + ('^X-Spam-Level:.\\*\\*\r\n^X-Spam:.\\Yes', 3, False), + ('Subject: \\[SPAM\\].*', 3, False), + ('^Subject: .*loan.*', 3, False), + ('Original-Received: from *linkedin.com*\r\n', 3, False), + ('X-Git-Module: rhq.*git', 6, False), + ('Approved: verysecretpassword', 6, False), + ('^Subject: dev-\r\n^Subject: staging-', 3, False), + ('from: .*i...@aolanchem.com\r\nfrom: .*@jw-express.com', + 2, False), + ('^Received: from smtp-.*\\.fedoraproject\\.org\r\n' + '^Received: from mx.*\\.redhat.com\r\n' + '^Resent-date:\r\n' + '^Resent-from:\r\n' + '^Resent-Message-ID:\r\n' + '^Resent-to:\r\n' + '^Subject: [^mtv]\r\n', + 7, False), + ('^Received: from fedorahosted\\.org.*by fedorahosted\\.org\r\n' + '^Received: from hosted.*\\.fedoraproject.org.*by ' + 'hosted.*\\.fedoraproject\\.org\r\n' + '^Received: from hosted.*\\.fedoraproject.org.*by ' + 'fedoraproject\\.org\r\n' + '^Received: from hosted.*\\.fedoraproject.org.*by ' + 'fedorahosted\\.org', + 6, False), + ] + error_log = LogFileMark('mailman.error') + self._import() + self.assertListEqual( + [(hm.header, hm.pattern, hm.chain) + for hm in self._mlist.header_matches ], [ + ('x-spam-status', 'Yes.*', 'discard'), + ('x-spam-status', 'Yes', 'reject'), + ('x-spam-level', '\\*\\*\\*.*$', 'discard'), + ('x-spam-level', '\\*\\*', 'discard'), + ('x-spam', '\\Yes', 'discard'), + ('subject', '\\[SPAM\\].*', 'discard'), + ('subject', '.*loan.*', 'discard'), + ('original-received', 'from *linkedin.com*', 'discard'), + ('x-git-module', 'rhq.*git', 'accept'), + ('approved', 'verysecretpassword', 'accept'), + ('subject', 'dev-', 'discard'), + ('subject', 'staging-', 'discard'), + ('from', '.*i...@aolanchem.com', 'reject'), + ('from', '.*@jw-express.com', 'reject'), + ('received', 'from smtp-.*\\.fedoraproject\\.org', 'hold'), + ('received', 'from mx.*\\.redhat.com', 'hold'), + ('resent-date', '.*', 'hold'), + ('resent-from', '.*', 'hold'), + ('resent-message-id', '.*', 'hold'), + ('resent-to', '.*', 'hold'), + ('subject', '[^mtv]', 'hold'), + ('received', 'from fedorahosted\\.org.*by fedorahosted\\.org', + 'accept'), + ('received', + 'from hosted.*\\.fedoraproject.org.*by ' + 'hosted.*\\.fedoraproject\\.org', 'accept'), + ('received', + 'from hosted.*\\.fedoraproject.org.*by ' + 'fedoraproject\\.org', 'accept'), + ('received', + 'from hosted.*\\.fedoraproject.org.*by ' + 'fedorahosted\\.org', 'accept'), + ]) + loglines = error_log.read().strip() + self.assertEqual(len(loglines), 0) + + def test_header_matches_header_only(self): + # Check that an empty pattern is skipped. + self._pckdict['header_filter_rules'] = [ + ('SomeHeaderName', 3, False), + ] + error_log = LogFileMark('mailman.error') + self._import() + self.assertListEqual(self._mlist.header_matches, []) + self.assertIn('Unsupported header_filter_rules pattern', + error_log.readline()) + + def test_header_matches_anything(self): + # Check that a wild card header pattern is skipped. + self._pckdict['header_filter_rules'] = [ + ('.*', 7, False), + ] + error_log = LogFileMark('mailman.error') + self._import() + self.assertListEqual(self._mlist.header_matches, []) + self.assertIn('Unsupported header_filter_rules pattern', + error_log.readline()) + + def test_header_matches_invalid_re(self): + # Check that an invalid regular expression pattern is skipped. + self._pckdict['header_filter_rules'] = [ + ('SomeHeaderName: *invalid-re', 3, False), + ] + error_log = LogFileMark('mailman.error') + self._import() + self.assertListEqual(self._mlist.header_matches, []) + self.assertIn('Skipping header_filter rule because of an invalid ' + 'regular expression', error_log.readline()) + + def test_header_matches_defer(self): + # Check that a defer action is properly converted. + self._pckdict['header_filter_rules'] = [ + ('^X-Spam-Status: Yes', 0, False), + ] + self._import() + self.assertListEqual( + [(hm.header, hm.pattern, hm.chain) + for hm in self._mlist.header_matches], + [('x-spam-status', 'Yes', None)] + ) + + def test_header_matches_unsupported_action(self): + # Check that unsupported actions are skipped. + for action_num in (1, 4, 5): + self._pckdict['header_filter_rules'] = [ + ('HeaderName: test-re', action_num, False), + ] + error_log = LogFileMark('mailman.error') + self._import() + self.assertListEqual(self._mlist.header_matches, []) + self.assertIn('Unsupported header_filter_rules action', + error_log.readline()) + # Avoid a useless warning. + for member in self._mlist.members.members: + member.unsubscribe() + for member in self._mlist.owners.members: + member.unsubscribe() + + def test_header_matches_duplicate(self): + # Check that duplicate patterns don't cause tracebacks. + self._pckdict['header_filter_rules'] = [ + ('SomeHeaderName: test-pattern', 3, False), + ('SomeHeaderName: test-pattern', 2, False), + ] + error_log = LogFileMark('mailman.error') + self._import() + self.assertListEqual( + [(hm.header, hm.pattern, hm.chain) + for hm in self._mlist.header_matches], + [('someheadername', 'test-pattern', 'discard')] + ) + self.assertIn('Skipping duplicate header_filter rule', + error_log.readline()) + class TestArchiveImport(unittest.TestCase): View it on GitLab: https://gitlab.com/mailman/mailman/compare/49d17bc04386293b3f659e24070f618f5f1b3b05...724b7cee7ed92a8107733cdef2906ef9c0d69f56
_______________________________________________ Mailman-checkins mailing list Mailman-checkins@python.org Unsubscribe: https://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org