JAMES-2340 Run SpamAssassin in docker on top of Postgres

Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/b998c587
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/b998c587
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/b998c587

Branch: refs/heads/master
Commit: b998c587e0d11ff4d60b5ced86b987f89589bb3e
Parents: 787635c
Author: Antoine Duprat <adup...@linagora.com>
Authored: Thu Mar 15 16:43:24 2018 +0100
Committer: benwa <btell...@linagora.com>
Committed: Tue Mar 27 15:17:37 2018 +0700

----------------------------------------------------------------------
 .../james/util/scanner/SpamAssassinInvoker.java |   2 +-
 .../util/scanner/SpamAssassinExtension.java     |   3 +-
 .../resources/docker/spamassassin/Dockerfile    |  23 +++-
 .../resources/docker/spamassassin/bayes_pg.sql  | 119 +++++++++++++++++++
 .../test/resources/docker/spamassassin/local.cf |   7 +-
 .../test/resources/docker/spamassassin/spamd.sh |   4 +
 6 files changed, 150 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/b998c587/server/container/util/src/main/java/org/apache/james/util/scanner/SpamAssassinInvoker.java
----------------------------------------------------------------------
diff --git 
a/server/container/util/src/main/java/org/apache/james/util/scanner/SpamAssassinInvoker.java
 
b/server/container/util/src/main/java/org/apache/james/util/scanner/SpamAssassinInvoker.java
index fdc3560..69047bc 100644
--- 
a/server/container/util/src/main/java/org/apache/james/util/scanner/SpamAssassinInvoker.java
+++ 
b/server/container/util/src/main/java/org/apache/james/util/scanner/SpamAssassinInvoker.java
@@ -239,6 +239,6 @@ public class SpamAssassinInvoker {
     }
 
     private boolean hasBeenSet(String line) {
-        return line.startsWith("DidSet: local");
+        return line.startsWith("DidSet: ");
     }
 }

http://git-wip-us.apache.org/repos/asf/james-project/blob/b998c587/server/container/util/src/test/java/org/apache/james/util/scanner/SpamAssassinExtension.java
----------------------------------------------------------------------
diff --git 
a/server/container/util/src/test/java/org/apache/james/util/scanner/SpamAssassinExtension.java
 
b/server/container/util/src/test/java/org/apache/james/util/scanner/SpamAssassinExtension.java
index 1564b72..68bbfe4 100644
--- 
a/server/container/util/src/test/java/org/apache/james/util/scanner/SpamAssassinExtension.java
+++ 
b/server/container/util/src/test/java/org/apache/james/util/scanner/SpamAssassinExtension.java
@@ -50,7 +50,8 @@ public class SpamAssassinExtension implements 
BeforeEachCallback, AfterEachCallb
                 .withFileFromClasspath("local.cf", 
"docker/spamassassin/local.cf")
                 .withFileFromClasspath("run.sh", "docker/spamassassin/run.sh")
                 .withFileFromClasspath("spamd.sh", 
"docker/spamassassin/spamd.sh")
-                .withFileFromClasspath("rule-update.sh", 
"docker/spamassassin/rule-update.sh"));
+                .withFileFromClasspath("rule-update.sh", 
"docker/spamassassin/rule-update.sh")
+                .withFileFromClasspath("bayes_pg.sql", 
"docker/spamassassin/bayes_pg.sql"));
         spamAssassinContainer.waitingFor(new SpamAssassinWaitStrategy());
     }
 

http://git-wip-us.apache.org/repos/asf/james-project/blob/b998c587/server/container/util/src/test/resources/docker/spamassassin/Dockerfile
----------------------------------------------------------------------
diff --git 
a/server/container/util/src/test/resources/docker/spamassassin/Dockerfile 
b/server/container/util/src/test/resources/docker/spamassassin/Dockerfile
index 4a5b24d..ad1187a 100644
--- a/server/container/util/src/test/resources/docker/spamassassin/Dockerfile
+++ b/server/container/util/src/test/resources/docker/spamassassin/Dockerfile
@@ -1,4 +1,4 @@
-FROM debian:stretch
+FROM postgres:10.3
 
 ENV SPAMASSASSIN_VERSION 3.4.1
 
@@ -9,12 +9,24 @@ RUN apt-get update && \
         libmail-dkim-perl \
         libnet-ident-perl \
         libsocket-getaddrinfo-perl \
+        pyzor \
+        razor \
+        libdbi-perl \
+        libdbd-pg-perl \
         spamassassin=${SPAMASSASSIN_VERSION}* && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-RUN mkdir /etc/mail/spamassassin/bayes_db && \
-    chmod -R 777 /etc/mail/spamassassin/bayes_db
+RUN mkdir -p /etc/spamassassin/sa-update-keys && \
+    chmod 700 /etc/spamassassin/sa-update-keys && \
+    chown debian-spamd:debian-spamd /etc/spamassassin/sa-update-keys && \
+    mkdir -p /var/lib/spamassassin/.pyzor && \
+    chmod 700 /var/lib/spamassassin/.pyzor && \
+    echo "public.pyzor.org:24441" > /var/lib/spamassassin/.pyzor/servers && \
+    chmod 600 /var/lib/spamassassin/.pyzor/servers && \
+    chown -R debian-spamd:debian-spamd /var/lib/spamassassin/.pyzor
+
+RUN sed -i 's/^logfile = .*$/logfile = \/dev\/stderr/g' 
/etc/razor/razor-agent.conf
 
 COPY spamd.sh /
 COPY rule-update.sh /
@@ -23,6 +35,9 @@ RUN chmod 755 /spamd.sh /rule-update.sh /run.sh
 
 COPY local.cf /etc/spamassassin/
 
+# Bayes database will be created automatically by Postres
+COPY bayes_pg.sql /docker-entrypoint-initdb.d/
+
 EXPOSE 783
 
-ENTRYPOINT /spamd.sh
+ENTRYPOINT ["/spamd.sh"]

http://git-wip-us.apache.org/repos/asf/james-project/blob/b998c587/server/container/util/src/test/resources/docker/spamassassin/bayes_pg.sql
----------------------------------------------------------------------
diff --git 
a/server/container/util/src/test/resources/docker/spamassassin/bayes_pg.sql 
b/server/container/util/src/test/resources/docker/spamassassin/bayes_pg.sql
new file mode 100644
index 0000000..ef96472
--- /dev/null
+++ b/server/container/util/src/test/resources/docker/spamassassin/bayes_pg.sql
@@ -0,0 +1,119 @@
+
+CREATE TABLE bayes_expire (
+  id integer NOT NULL default '0',
+  runtime integer NOT NULL default '0'
+) WITHOUT OIDS;
+
+CREATE INDEX bayes_expire_idx1 ON bayes_expire (id);
+
+CREATE TABLE bayes_global_vars (
+  variable varchar(30) NOT NULL default '',
+  value varchar(200) NOT NULL default '',
+  PRIMARY KEY  (variable)
+) WITHOUT OIDS;
+
+INSERT INTO bayes_global_vars VALUES ('VERSION','3');
+
+CREATE TABLE bayes_seen (
+  id integer NOT NULL default '0',
+  msgid varchar(200) NOT NULL default '',
+  flag character(1) NOT NULL default '',
+  PRIMARY KEY  (id,msgid)
+) WITHOUT OIDS;
+
+CREATE TABLE bayes_token (
+  id integer NOT NULL default '0',
+  token bytea NOT NULL default '',
+  spam_count integer NOT NULL default '0',
+  ham_count integer NOT NULL default '0',
+  atime integer NOT NULL default '0',
+  PRIMARY KEY  (id,token)
+) WITHOUT OIDS;
+
+CREATE INDEX bayes_token_idx1 ON bayes_token (token);
+
+ALTER TABLE bayes_token SET (fillfactor=95);
+
+CREATE TABLE bayes_vars (
+  id serial NOT NULL,
+  username varchar(200) NOT NULL default '',
+  spam_count integer NOT NULL default '0',
+  ham_count integer NOT NULL default '0',
+  token_count integer NOT NULL default '0',
+  last_expire integer NOT NULL default '0',
+  last_atime_delta integer NOT NULL default '0',
+  last_expire_reduce integer NOT NULL default '0',
+  oldest_token_age integer NOT NULL default '2147483647',
+  newest_token_age integer NOT NULL default '0',
+  PRIMARY KEY  (id)
+) WITHOUT OIDS;
+
+CREATE UNIQUE INDEX bayes_vars_idx1 ON bayes_vars (username);
+
+CREATE OR REPLACE FUNCTION greatest_int (integer, integer)
+ RETURNS INTEGER
+ IMMUTABLE STRICT
+ AS 'SELECT CASE WHEN $1 < $2 THEN $2 ELSE $1 END;'
+ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION least_int (integer, integer)
+ RETURNS INTEGER
+ IMMUTABLE STRICT
+ AS 'SELECT CASE WHEN $1 < $2 THEN $1 ELSE $2 END;'
+ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION put_tokens(INTEGER,
+                                      BYTEA[],
+                                      INTEGER,
+                                      INTEGER,
+                                      INTEGER)
+RETURNS VOID AS ' 
+DECLARE
+  inuserid      ALIAS FOR $1;
+  intokenary    ALIAS FOR $2;
+  inspam_count  ALIAS FOR $3;
+  inham_count   ALIAS FOR $4;
+  inatime       ALIAS FOR $5;
+  _token BYTEA;
+  new_tokens INTEGER := 0;
+BEGIN
+  for i in array_lower(intokenary, 1) .. array_upper(intokenary, 1)
+  LOOP
+    _token := intokenary[i];
+    UPDATE bayes_token
+       SET spam_count = greatest_int(spam_count + inspam_count, 0),
+           ham_count = greatest_int(ham_count + inham_count, 0),
+           atime = greatest_int(atime, inatime)
+     WHERE id = inuserid 
+       AND token = _token;
+    IF NOT FOUND THEN 
+      -- we do not insert negative counts, just return true
+      IF NOT (inspam_count < 0 OR inham_count < 0) THEN
+        INSERT INTO bayes_token (id, token, spam_count, ham_count, atime) 
+        VALUES (inuserid, _token, inspam_count, inham_count, inatime); 
+        IF FOUND THEN
+          new_tokens := new_tokens + 1;
+        END IF;
+      END IF;
+    END IF;
+  END LOOP;
+
+  IF new_tokens > 0 AND inatime > 0 THEN
+    UPDATE bayes_vars
+       SET token_count = token_count + new_tokens,
+           newest_token_age = greatest_int(newest_token_age, inatime),
+           oldest_token_age = least_int(oldest_token_age, inatime)
+     WHERE id = inuserid;
+  ELSIF new_tokens > 0 AND NOT inatime > 0 THEN
+    UPDATE bayes_vars
+       SET token_count = token_count + new_tokens
+     WHERE id = inuserid;
+  ELSIF NOT new_tokens > 0 AND inatime > 0 THEN
+    UPDATE bayes_vars
+       SET newest_token_age = greatest_int(newest_token_age, inatime),
+           oldest_token_age = least_int(oldest_token_age, inatime)
+     WHERE id = inuserid;
+  END IF;
+  RETURN;
+END; 
+' LANGUAGE 'plpgsql'; 

http://git-wip-us.apache.org/repos/asf/james-project/blob/b998c587/server/container/util/src/test/resources/docker/spamassassin/local.cf
----------------------------------------------------------------------
diff --git 
a/server/container/util/src/test/resources/docker/spamassassin/local.cf 
b/server/container/util/src/test/resources/docker/spamassassin/local.cf
index 6263270..6005e55 100644
--- a/server/container/util/src/test/resources/docker/spamassassin/local.cf
+++ b/server/container/util/src/test/resources/docker/spamassassin/local.cf
@@ -43,8 +43,11 @@ use_bayes 1
 #
 #bayes_auto_learn 1
 
-bayes_path /etc/mail/spamassassin/bayes_db/bayes
-bayes_file_mode 0777
+bayes_store_module Mail::SpamAssassin::BayesStore::PgSQL
+
+bayes_sql_dsn DBI:Pg:dbname=postgres;host=localhost
+bayes_sql_username postgres
+
 bayes_min_spam_num 1
 bayes_min_ham_num 1
 

http://git-wip-us.apache.org/repos/asf/james-project/blob/b998c587/server/container/util/src/test/resources/docker/spamassassin/spamd.sh
----------------------------------------------------------------------
diff --git 
a/server/container/util/src/test/resources/docker/spamassassin/spamd.sh 
b/server/container/util/src/test/resources/docker/spamassassin/spamd.sh
index 251e981..2093cf8 100755
--- a/server/container/util/src/test/resources/docker/spamassassin/spamd.sh
+++ b/server/container/util/src/test/resources/docker/spamassassin/spamd.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+echo "Run Postgres"
+/usr/local/bin/docker-entrypoint.sh postgres &
+
+echo "Run spamd"
 spamd --username debian-spamd \
       --nouser-config \
       --syslog stderr \


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org
For additional commands, e-mail: server-dev-h...@james.apache.org

Reply via email to