Repository: allura Updated Branches: refs/heads/master f597b2e67 -> 6742f0f15
[#8192] StopForumSpam filter, and chained spam filters Project: http://git-wip-us.apache.org/repos/asf/allura/repo Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/407559b0 Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/407559b0 Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/407559b0 Branch: refs/heads/master Commit: 407559b02d77284b839e3ea9a26ef48dc1193c32 Parents: f597b2e Author: Dave Brondsema <d...@brondsema.net> Authored: Thu Mar 1 15:33:12 2018 -0500 Committer: Kenton Taylor <ktay...@slashdotmedia.com> Committed: Wed Mar 7 14:38:33 2018 -0500 ---------------------------------------------------------------------- Allura/allura/lib/app_globals.py | 3 +- Allura/allura/lib/spam/__init__.py | 51 +++++++++++++--- Allura/allura/lib/spam/akismetfilter.py | 9 +-- Allura/allura/lib/spam/mollomfilter.py | 9 +-- Allura/allura/lib/spam/stopforumspamfilter.py | 63 ++++++++++++++++++++ .../allura/tests/unit/spam/test_spam_filter.py | 30 +++++++++- .../tests/unit/spam/test_stopforumspam.py | 51 ++++++++++++++++ Allura/development.ini | 5 +- Allura/setup.py | 1 + 9 files changed, 197 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/app_globals.py ---------------------------------------------------------------------- diff --git a/Allura/allura/lib/app_globals.py b/Allura/allura/lib/app_globals.py index 5453055..fc06acc 100644 --- a/Allura/allura/lib/app_globals.py +++ b/Allura/allura/lib/app_globals.py @@ -256,7 +256,8 @@ class Globals(object): value = ep.load() except Exception: log.exception('Could not load entry point [%s] %s', section_name, ep) - d[ep.name] = value + else: + d[ep.name] = value return d class entry_point_loading_dict(dict): http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/__init__.py ---------------------------------------------------------------------- diff --git a/Allura/allura/lib/spam/__init__.py b/Allura/allura/lib/spam/__init__.py index a4405e3..aaa4412 100644 --- a/Allura/allura/lib/spam/__init__.py +++ b/Allura/allura/lib/spam/__init__.py @@ -16,6 +16,9 @@ # under the License. import logging +from copy import copy + +from paste.deploy.converters import aslist from allura.lib.helpers import exceptionless from allura.model.artifact import SpamCheckResult @@ -30,20 +33,23 @@ class SpamFilter(object): def __init__(self, config): pass + @property + def filter_name(self): + return self.__class__.__name__.replace('SpamFilter', '').lower() + def check(self, text, artifact=None, user=None, content_type='comment', **kw): """Return True if ``text`` is spam, else False.""" log.info("No spam checking enabled") return False def submit_spam(self, text, artifact=None, user=None, content_type='comment', **kw): - log.info("No spam checking enabled") + log.info("No submit_spam available for %s", self.filter_name) def submit_ham(self, text, artifact=None, user=None, content_type='comment', **kw): - log.info("No spam checking enabled") + log.info("No submit_ham available for %s", self.filter_name) def record_result(self, result, artifact, user): - filter_name = self.__class__.__name__.replace('SpamFilter', '').lower() - log.info("spam=%s (%s): %s" % (str(result), filter_name, artifact.url() if artifact else '')) + log.info("spam=%s (%s): %s" % (str(result), self.filter_name, artifact.url() if artifact else '')) r = SpamCheckResult( ref=artifact.ref if artifact else None, project_id=artifact.project_id if artifact else None, @@ -60,7 +66,36 @@ class SpamFilter(object): method = config.get('spam.method') if not method: return cls(config) - result = entry_points[method] - filter_obj = result(config) - filter_obj.check = exceptionless(False, log=log)(filter_obj.check) - return filter_obj + elif ' ' in method: + return ChainedSpamFilter(method, entry_points, config) + else: + result = entry_points[method] + filter_obj = result(config) + filter_obj.check = exceptionless(False, log=log)(filter_obj.check) + return filter_obj + + +class ChainedSpamFilter(SpamFilter): + + def __init__(self, methods_string, entry_points, config): + methods = aslist(methods_string) + self.filters = [] + for m in methods: + config = copy(config) + config['spam.method'] = m + spam_filter = SpamFilter.get(config=config, entry_points=entry_points) + self.filters.append(spam_filter) + + def check(self, *a, **kw): + for spam_filter in self.filters: + if spam_filter.check(*a, **kw): + return True + return False + + def submit_spam(self, *a, **kw): + for spam_filter in self.filters: + spam_filter.submit_spam(*a, **kw) + + def submit_ham(self, *a, **kw): + for spam_filter in self.filters: + spam_filter.submit_ham(*a, **kw) http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/akismetfilter.py ---------------------------------------------------------------------- diff --git a/Allura/allura/lib/spam/akismetfilter.py b/Allura/allura/lib/spam/akismetfilter.py index 7a87bd9..26cccbd 100644 --- a/Allura/allura/lib/spam/akismetfilter.py +++ b/Allura/allura/lib/spam/akismetfilter.py @@ -38,13 +38,8 @@ class AkismetSpamFilter(SpamFilter): """Spam checking implementation via Akismet service. - To enable Akismet spam filtering in your Allura instance, first - enable the entry point in setup.py:: - - [allura.spam] - akismet = allura.lib.spam.akismetfilter:AkismetSpamFilter - - Then include the following parameters in your .ini file:: + To enable Akismet spam filtering in your Allura instance, first pip install akismet (see requirements-optional.txt) + and then include the following parameters in your .ini file:: spam.method = akismet spam.key = <your Akismet key here> http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/mollomfilter.py ---------------------------------------------------------------------- diff --git a/Allura/allura/lib/spam/mollomfilter.py b/Allura/allura/lib/spam/mollomfilter.py index 533186b..f2d6b68 100644 --- a/Allura/allura/lib/spam/mollomfilter.py +++ b/Allura/allura/lib/spam/mollomfilter.py @@ -38,13 +38,8 @@ class MollomSpamFilter(SpamFilter): """Spam checking implementation via Mollom service. - To enable Mollom spam filtering in your Allura instance, first - enable the entry point in setup.py:: - - [allura.spam] - mollom = allura.lib.spam.mollomfilter:MollomSpamFilter - - Then include the following parameters in your .ini file:: + To enable Mollom spam filtering in your Allura instance, first pip install PyMollom (see requirements-optional.txt) + and then include the following parameters in your .ini file:: spam.method = mollom spam.public_key = <your Mollom public key here> http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/stopforumspamfilter.py ---------------------------------------------------------------------- diff --git a/Allura/allura/lib/spam/stopforumspamfilter.py b/Allura/allura/lib/spam/stopforumspamfilter.py new file mode 100644 index 0000000..0e263be --- /dev/null +++ b/Allura/allura/lib/spam/stopforumspamfilter.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging +import csv +from sys import getsizeof + +import ipaddress +from pylons import request + +from allura.lib import utils +from allura.lib.spam import SpamFilter + +log = logging.getLogger(__name__) + + +class StopForumSpamSpamFilter(SpamFilter): + """Spam checking by IP address, using StopForumSpam data files + + To enable StopForumSpam spam filtering in your Allura instance, + include the following parameters in your .ini file:: + + spam.method = stopforumspam + spam.stopforumspam.ip_addr_file = /path/to/listed_ip_180_all.txt + + Of course you'll need to download the file from https://www.stopforumspam.com/downloads and do so on a regular + basis to keep it updated. + """ + + def __init__(self, config): + self.packed_ips = set() + with open(config['spam.stopforumspam.ip_addr_file']) as f: + csv_file = csv.reader(f) + for record in csv_file: + if int(record[1]) > int(config.get('spam.stopforumspam.threshold', 20)): + ip = record[0] + # int is the smallest memory representation of an IP addr + ip_int = int(ipaddress.ip_address(unicode(ip))) + self.packed_ips.add(ip_int) + # to get actual memory usage, use: from pympler.asizeof import asizeof + log.info('Read stopforumspam file; %s recs, probably %s bytes stored in memory', len(self.packed_ips), + len(self.packed_ips) * getsizeof(iter(self.packed_ips).next()) * 2) + + def check(self, text, artifact=None, user=None, content_type='comment', **kw): + ip = utils.ip_address(request) + ip_int = int(ipaddress.ip_address(unicode(ip))) + res = ip_int in self.packed_ips + self.record_result(res, artifact, user) + return res http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/tests/unit/spam/test_spam_filter.py ---------------------------------------------------------------------- diff --git a/Allura/allura/tests/unit/spam/test_spam_filter.py b/Allura/allura/tests/unit/spam/test_spam_filter.py index 9d603f8..d05fa97 100644 --- a/Allura/allura/tests/unit/spam/test_spam_filter.py +++ b/Allura/allura/tests/unit/spam/test_spam_filter.py @@ -23,7 +23,7 @@ import unittest from ming.odm import ThreadLocalORMSession from nose.tools import assert_equal -from allura.lib.spam import SpamFilter +from allura.lib.spam import SpamFilter, ChainedSpamFilter from allura import model as M from allura.model.artifact import SpamCheckResult from alluratest.controller import setup_basic_test @@ -32,8 +32,19 @@ from forgewiki import model as WM class MockFilter(SpamFilter): + def __init__(self, config): + self.config = config + def check(*args, **kw): raise Exception("test exception") + + +class MockFilter2(SpamFilter): + + def __init__(self, config): + self.config = config + + def check(*args, **kw): return True @@ -82,3 +93,20 @@ class TestSpamFilterFunctional(object): assert_equal(len(results), 1) assert_equal(results[0].result, True) assert_equal(results[0].user.username, 'test-user') + + +class TestChainedSpamFilter(object): + + def test(self): + config = {'spam.method': 'mock1 mock2', 'spam.settingA': 'bcd'} + entry_points = {'mock1': MockFilter, 'mock2': MockFilter2} + checker = SpamFilter.get(config, entry_points) + assert isinstance(checker, ChainedSpamFilter) + assert len(checker.filters) == 2, checker.filters + assert_equal(checker.filters[0].config, {'spam.method': 'mock1', 'spam.settingA': 'bcd'}) + assert_equal(checker.filters[1].config, {'spam.method': 'mock2', 'spam.settingA': 'bcd'}) + + assert checker.check() # True because first filter errors out, and 2nd returns True + + checker.submit_spam('some text') + checker.submit_ham('some text') http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/tests/unit/spam/test_stopforumspam.py ---------------------------------------------------------------------- diff --git a/Allura/allura/tests/unit/spam/test_stopforumspam.py b/Allura/allura/tests/unit/spam/test_stopforumspam.py new file mode 100644 index 0000000..64963cf --- /dev/null +++ b/Allura/allura/tests/unit/spam/test_stopforumspam.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import tempfile + +import mock + +from bson import ObjectId +from nose.tools import assert_equal + +from allura.lib.spam.stopforumspamfilter import StopForumSpamSpamFilter + + +class TestStopForumSpam(object): + + def setUp(self): + self.content = u'spåm text' + + self.artifact = mock.Mock() + self.artifact.project_id = ObjectId() + self.artifact.ref = None + + with tempfile.NamedTemporaryFile() as f: + f.write('''"1.1.1.1","2","2018-01-22 10:56:29" +"1.2.3.4","42","2017-09-24 18:33:00" +"4.3.2.1","1","2017-09-28 14:03:53"''') + f.flush() + self.sfs = StopForumSpamSpamFilter({'spam.stopforumspam.ip_addr_file': f.name}) + + @mock.patch('allura.lib.spam.stopforumspamfilter.request') + def test_check(self, request): + request.remote_addr = '1.2.3.4' + assert_equal(True, self.sfs.check(self.content, artifact=self.artifact)) + + request.remote_addr = '1.1.1.1' + assert_equal(False, self.sfs.check(self.content, artifact=self.artifact)) http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/development.ini ---------------------------------------------------------------------- diff --git a/Allura/development.ini b/Allura/development.ini index 4a7b9d9..d3f2700 100644 --- a/Allura/development.ini +++ b/Allura/development.ini @@ -237,13 +237,16 @@ user_prefs.maximum_claimed_emails = 20 site_admin_project = allura site_admin_project_nbhd = Projects -; Spam filtering service: mollom or akismet +; Spam filtering service: this can be one or more of: mollom akismet stopforumspam ;spam.method = akismet ; for akismet: ;spam.key = ; for mollom: ;spam.public_key = ;spam.private_key = +; for stopforumspam, should be a listed_ip_*_all.txt file +;spam.stopforumspam.ip_addr_file = +;spam.stopforumspam.threshold = 20 ; Phone verification service: Nexmo Verify ; phone.method = nexmo http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/setup.py ---------------------------------------------------------------------- diff --git a/Allura/setup.py b/Allura/setup.py index 3005d07..30bca7d 100644 --- a/Allura/setup.py +++ b/Allura/setup.py @@ -106,6 +106,7 @@ setup( [allura.spam] akismet = allura.lib.spam.akismetfilter:AkismetSpamFilter mollom = allura.lib.spam.mollomfilter:MollomSpamFilter + stopforumspam = allura.lib.spam.stopforumspamfilter:StopForumSpamSpamFilter [allura.phone] nexmo = allura.lib.phone.nexmo:NexmoPhoneService