Repository: allura
Updated Branches:
  refs/heads/master f597b2e67 -> 6742f0f15


[#8192] StopForumSpam filter, and chained spam filters


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/407559b0
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/407559b0
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/407559b0

Branch: refs/heads/master
Commit: 407559b02d77284b839e3ea9a26ef48dc1193c32
Parents: f597b2e
Author: Dave Brondsema <d...@brondsema.net>
Authored: Thu Mar 1 15:33:12 2018 -0500
Committer: Kenton Taylor <ktay...@slashdotmedia.com>
Committed: Wed Mar 7 14:38:33 2018 -0500

----------------------------------------------------------------------
 Allura/allura/lib/app_globals.py                |  3 +-
 Allura/allura/lib/spam/__init__.py              | 51 +++++++++++++---
 Allura/allura/lib/spam/akismetfilter.py         |  9 +--
 Allura/allura/lib/spam/mollomfilter.py          |  9 +--
 Allura/allura/lib/spam/stopforumspamfilter.py   | 63 ++++++++++++++++++++
 .../allura/tests/unit/spam/test_spam_filter.py  | 30 +++++++++-
 .../tests/unit/spam/test_stopforumspam.py       | 51 ++++++++++++++++
 Allura/development.ini                          |  5 +-
 Allura/setup.py                                 |  1 +
 9 files changed, 197 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/app_globals.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/app_globals.py b/Allura/allura/lib/app_globals.py
index 5453055..fc06acc 100644
--- a/Allura/allura/lib/app_globals.py
+++ b/Allura/allura/lib/app_globals.py
@@ -256,7 +256,8 @@ class Globals(object):
                     value = ep.load()
                 except Exception:
                     log.exception('Could not load entry point [%s] %s', 
section_name, ep)
-                d[ep.name] = value
+                else:
+                    d[ep.name] = value
             return d
 
         class entry_point_loading_dict(dict):

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/__init__.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/spam/__init__.py 
b/Allura/allura/lib/spam/__init__.py
index a4405e3..aaa4412 100644
--- a/Allura/allura/lib/spam/__init__.py
+++ b/Allura/allura/lib/spam/__init__.py
@@ -16,6 +16,9 @@
 #       under the License.
 
 import logging
+from copy import copy
+
+from paste.deploy.converters import aslist
 
 from allura.lib.helpers import exceptionless
 from allura.model.artifact import SpamCheckResult
@@ -30,20 +33,23 @@ class SpamFilter(object):
     def __init__(self, config):
         pass
 
+    @property
+    def filter_name(self):
+        return self.__class__.__name__.replace('SpamFilter', '').lower()
+
     def check(self, text, artifact=None, user=None, content_type='comment', 
**kw):
         """Return True if ``text`` is spam, else False."""
         log.info("No spam checking enabled")
         return False
 
     def submit_spam(self, text, artifact=None, user=None, 
content_type='comment', **kw):
-        log.info("No spam checking enabled")
+        log.info("No submit_spam available for %s", self.filter_name)
 
     def submit_ham(self, text, artifact=None, user=None, 
content_type='comment', **kw):
-        log.info("No spam checking enabled")
+        log.info("No submit_ham available for %s", self.filter_name)
 
     def record_result(self, result, artifact, user):
-        filter_name = self.__class__.__name__.replace('SpamFilter', '').lower()
-        log.info("spam=%s (%s): %s" % (str(result), filter_name, 
artifact.url() if artifact else ''))
+        log.info("spam=%s (%s): %s" % (str(result), self.filter_name, 
artifact.url() if artifact else ''))
         r = SpamCheckResult(
             ref=artifact.ref if artifact else None,
             project_id=artifact.project_id if artifact else None,
@@ -60,7 +66,36 @@ class SpamFilter(object):
         method = config.get('spam.method')
         if not method:
             return cls(config)
-        result = entry_points[method]
-        filter_obj = result(config)
-        filter_obj.check = exceptionless(False, log=log)(filter_obj.check)
-        return filter_obj
+        elif ' ' in method:
+            return ChainedSpamFilter(method, entry_points, config)
+        else:
+            result = entry_points[method]
+            filter_obj = result(config)
+            filter_obj.check = exceptionless(False, log=log)(filter_obj.check)
+            return filter_obj
+
+
+class ChainedSpamFilter(SpamFilter):
+
+    def __init__(self, methods_string, entry_points, config):
+        methods = aslist(methods_string)
+        self.filters = []
+        for m in methods:
+            config = copy(config)
+            config['spam.method'] = m
+            spam_filter = SpamFilter.get(config=config, 
entry_points=entry_points)
+            self.filters.append(spam_filter)
+
+    def check(self, *a, **kw):
+        for spam_filter in self.filters:
+            if spam_filter.check(*a, **kw):
+                return True
+        return False
+
+    def submit_spam(self, *a, **kw):
+        for spam_filter in self.filters:
+            spam_filter.submit_spam(*a, **kw)
+
+    def submit_ham(self, *a, **kw):
+        for spam_filter in self.filters:
+            spam_filter.submit_ham(*a, **kw)

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/akismetfilter.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/spam/akismetfilter.py 
b/Allura/allura/lib/spam/akismetfilter.py
index 7a87bd9..26cccbd 100644
--- a/Allura/allura/lib/spam/akismetfilter.py
+++ b/Allura/allura/lib/spam/akismetfilter.py
@@ -38,13 +38,8 @@ class AkismetSpamFilter(SpamFilter):
 
     """Spam checking implementation via Akismet service.
 
-    To enable Akismet spam filtering in your Allura instance, first
-    enable the entry point in setup.py::
-
-        [allura.spam]
-        akismet = allura.lib.spam.akismetfilter:AkismetSpamFilter
-
-    Then include the following parameters in your .ini file::
+    To enable Akismet spam filtering in your Allura instance, first pip 
install akismet (see requirements-optional.txt)
+    and then include the following parameters in your .ini file::
 
         spam.method = akismet
         spam.key = <your Akismet key here>

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/mollomfilter.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/spam/mollomfilter.py 
b/Allura/allura/lib/spam/mollomfilter.py
index 533186b..f2d6b68 100644
--- a/Allura/allura/lib/spam/mollomfilter.py
+++ b/Allura/allura/lib/spam/mollomfilter.py
@@ -38,13 +38,8 @@ class MollomSpamFilter(SpamFilter):
 
     """Spam checking implementation via Mollom service.
 
-    To enable Mollom spam filtering in your Allura instance, first
-    enable the entry point in setup.py::
-
-        [allura.spam]
-        mollom = allura.lib.spam.mollomfilter:MollomSpamFilter
-
-    Then include the following parameters in your .ini file::
+    To enable Mollom spam filtering in your Allura instance, first pip install 
PyMollom (see requirements-optional.txt)
+    and then include the following parameters in your .ini file::
 
         spam.method = mollom
         spam.public_key = <your Mollom public key here>

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/lib/spam/stopforumspamfilter.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/spam/stopforumspamfilter.py 
b/Allura/allura/lib/spam/stopforumspamfilter.py
new file mode 100644
index 0000000..0e263be
--- /dev/null
+++ b/Allura/allura/lib/spam/stopforumspamfilter.py
@@ -0,0 +1,63 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+import logging
+import csv
+from sys import getsizeof
+
+import ipaddress
+from pylons import request
+
+from allura.lib import utils
+from allura.lib.spam import SpamFilter
+
+log = logging.getLogger(__name__)
+
+
+class StopForumSpamSpamFilter(SpamFilter):
+    """Spam checking by IP address, using StopForumSpam data files
+
+    To enable StopForumSpam spam filtering in your Allura instance,
+    include the following parameters in your .ini file::
+
+        spam.method = stopforumspam
+        spam.stopforumspam.ip_addr_file = /path/to/listed_ip_180_all.txt
+
+    Of course you'll need to download the file from 
https://www.stopforumspam.com/downloads and do so on a regular
+    basis to keep it updated.
+    """
+
+    def __init__(self, config):
+        self.packed_ips = set()
+        with open(config['spam.stopforumspam.ip_addr_file']) as f:
+            csv_file = csv.reader(f)
+            for record in csv_file:
+                if int(record[1]) > 
int(config.get('spam.stopforumspam.threshold', 20)):
+                    ip = record[0]
+                    # int is the smallest memory representation of an IP addr
+                    ip_int = int(ipaddress.ip_address(unicode(ip)))
+                    self.packed_ips.add(ip_int)
+        # to get actual memory usage, use: from pympler.asizeof import asizeof
+        log.info('Read stopforumspam file; %s recs, probably %s bytes stored 
in memory', len(self.packed_ips),
+                 len(self.packed_ips) * 
getsizeof(iter(self.packed_ips).next()) * 2)
+
+    def check(self, text, artifact=None, user=None, content_type='comment', 
**kw):
+        ip = utils.ip_address(request)
+        ip_int = int(ipaddress.ip_address(unicode(ip)))
+        res = ip_int in self.packed_ips
+        self.record_result(res, artifact, user)
+        return res

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/tests/unit/spam/test_spam_filter.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/unit/spam/test_spam_filter.py 
b/Allura/allura/tests/unit/spam/test_spam_filter.py
index 9d603f8..d05fa97 100644
--- a/Allura/allura/tests/unit/spam/test_spam_filter.py
+++ b/Allura/allura/tests/unit/spam/test_spam_filter.py
@@ -23,7 +23,7 @@ import unittest
 from ming.odm import ThreadLocalORMSession
 from nose.tools import assert_equal
 
-from allura.lib.spam import SpamFilter
+from allura.lib.spam import SpamFilter, ChainedSpamFilter
 from allura import model as M
 from allura.model.artifact import SpamCheckResult
 from alluratest.controller import setup_basic_test
@@ -32,8 +32,19 @@ from forgewiki import model as WM
 
 class MockFilter(SpamFilter):
 
+    def __init__(self, config):
+        self.config = config
+
     def check(*args, **kw):
         raise Exception("test exception")
+
+
+class MockFilter2(SpamFilter):
+
+    def __init__(self, config):
+        self.config = config
+
+    def check(*args, **kw):
         return True
 
 
@@ -82,3 +93,20 @@ class TestSpamFilterFunctional(object):
         assert_equal(len(results), 1)
         assert_equal(results[0].result, True)
         assert_equal(results[0].user.username, 'test-user')
+
+
+class TestChainedSpamFilter(object):
+
+    def test(self):
+        config = {'spam.method': 'mock1 mock2', 'spam.settingA': 'bcd'}
+        entry_points = {'mock1': MockFilter, 'mock2': MockFilter2}
+        checker = SpamFilter.get(config, entry_points)
+        assert isinstance(checker, ChainedSpamFilter)
+        assert len(checker.filters) == 2, checker.filters
+        assert_equal(checker.filters[0].config, {'spam.method': 'mock1', 
'spam.settingA': 'bcd'})
+        assert_equal(checker.filters[1].config, {'spam.method': 'mock2', 
'spam.settingA': 'bcd'})
+
+        assert checker.check()  # True because first filter errors out, and 
2nd returns True
+
+        checker.submit_spam('some text')
+        checker.submit_ham('some text')

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/allura/tests/unit/spam/test_stopforumspam.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/unit/spam/test_stopforumspam.py 
b/Allura/allura/tests/unit/spam/test_stopforumspam.py
new file mode 100644
index 0000000..64963cf
--- /dev/null
+++ b/Allura/allura/tests/unit/spam/test_stopforumspam.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+import tempfile
+
+import mock
+
+from bson import ObjectId
+from nose.tools import assert_equal
+
+from allura.lib.spam.stopforumspamfilter import StopForumSpamSpamFilter
+
+
+class TestStopForumSpam(object):
+
+    def setUp(self):
+        self.content = u'spåm text'
+
+        self.artifact = mock.Mock()
+        self.artifact.project_id = ObjectId()
+        self.artifact.ref = None
+
+        with tempfile.NamedTemporaryFile() as f:
+            f.write('''"1.1.1.1","2","2018-01-22 10:56:29"
+"1.2.3.4","42","2017-09-24 18:33:00"
+"4.3.2.1","1","2017-09-28 14:03:53"''')
+            f.flush()
+            self.sfs = 
StopForumSpamSpamFilter({'spam.stopforumspam.ip_addr_file': f.name})
+
+    @mock.patch('allura.lib.spam.stopforumspamfilter.request')
+    def test_check(self, request):
+        request.remote_addr = '1.2.3.4'
+        assert_equal(True, self.sfs.check(self.content, 
artifact=self.artifact))
+
+        request.remote_addr = '1.1.1.1'
+        assert_equal(False, self.sfs.check(self.content, 
artifact=self.artifact))

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/development.ini
----------------------------------------------------------------------
diff --git a/Allura/development.ini b/Allura/development.ini
index 4a7b9d9..d3f2700 100644
--- a/Allura/development.ini
+++ b/Allura/development.ini
@@ -237,13 +237,16 @@ user_prefs.maximum_claimed_emails = 20
 site_admin_project = allura
 site_admin_project_nbhd = Projects
 
-; Spam filtering service: mollom or akismet
+; Spam filtering service: this can be one or more of: mollom akismet 
stopforumspam
 ;spam.method = akismet
 ; for akismet:
 ;spam.key =
 ; for mollom:
 ;spam.public_key =
 ;spam.private_key =
+; for stopforumspam, should be a listed_ip_*_all.txt file
+;spam.stopforumspam.ip_addr_file =
+;spam.stopforumspam.threshold = 20
 
 ; Phone verification service: Nexmo Verify
 ; phone.method = nexmo

http://git-wip-us.apache.org/repos/asf/allura/blob/407559b0/Allura/setup.py
----------------------------------------------------------------------
diff --git a/Allura/setup.py b/Allura/setup.py
index 3005d07..30bca7d 100644
--- a/Allura/setup.py
+++ b/Allura/setup.py
@@ -106,6 +106,7 @@ setup(
     [allura.spam]
     akismet = allura.lib.spam.akismetfilter:AkismetSpamFilter
     mollom = allura.lib.spam.mollomfilter:MollomSpamFilter
+    stopforumspam = allura.lib.spam.stopforumspamfilter:StopForumSpamSpamFilter
 
     [allura.phone]
     nexmo = allura.lib.phone.nexmo:NexmoPhoneService

Reply via email to