On 30.10.2018 20:46, Chris Narkiewicz wrote:
W dniu 30/10/2018 o 19:31, Peter N. M. Hansteen pisze:
yes, a well-known problem, and it's what nospamd (hinted at in the spamd
man pages) is for.

To some extent it helps to whitelist IP addresses and networks that
domains list in their SPF info.

Yeah, I hoped there are some reputable sources of validated mail
sources based on SPF and DKIM.

I'll give a try to your compiled list, but the fact you maintain
it manually is a bit discouraging.
I ran into this problem as well.
I ended up writing a script that parses the SPF entries out of the greylist and if reasonable, whitelists those ranges and removes the grey list entries. It runs every 15 minutes.

This works with the following rules
pass in quick on $extIf proto tcp from <spfwhite> to $pubIp port smtp \
    rdr-to $mailsrv
pass in quick on $extIf proto tcp from !<spamd-white> to $pubIp port smtp \
    rdr-to 127.0.0.1 port $spamdPort

The trapping function when it goes to the wrong recipient works for me and probably does not scale. The spamdb -Gd calls to remove the greylist entries are something i patched into spamd, but it seems that functionality has somehow made it into the regular binary.

The script is fairly debugged and has run for me over a year with good results, but seriously lacks tests of any kind.
Your mileage may vary.

--
Mit freundlichen Grüßen/Best regards

Mario Theodoridis

#!/usr/bin/env python2.7
import subprocess, traceback, os, re, sys, time
import dns.resolver, dns.name, dns.exception
import socket,struct

def doLog(msg, caller=2):
    debugLog = '/var/log/scanSpam.log'
    stk = traceback.extract_stack()
    orig = ''
    for i in range(0, len(stk)-caller):
        if stk[i][3] == None:
            orig += '__main__:'
        else:
            orig += stk[i][3] + ':'
    x = stk[-caller][1]
    out = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + msg \
        + ' STACK[' + orig + str(x) + ']\n'
    wh = open(debugLog, 'a')
    wh.write(out)
    wh.close()
    

def run(command, caller=3):
    """ run(command) -> (returncode, stdout, stderr)

    Runs the given command in the shell and returns the output and return code """
    proc = subprocess.Popen(command, stdout=subprocess.PIPE, 
                            stderr=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    doLog("COM:[" + command + "]   RC:[" + str(proc.returncode) + "185  OUT:[" \
          + out.strip() + "]  ERR:[" + err.strip() + "]", caller)
    return (proc.returncode, out, err)

def makeMask(n):
    "return a mask of n bits as a long integer"
    return (2L<<n-1) - 1

def dottedQuadToNum(ip):
    "convert decimal dotted quad string to long integer"
    return struct.unpack('I',socket.inet_aton(ip))[0]
        
def isValid(ipnet):
    if ipnet.strip() == '':
        return False
    (ip, mask) = getIpNetMask(ipnet)
    try:
        struct.unpack('I',socket.inet_aton(ip))[0]
        return True
    except Exception:
        msg = "invalid spec [{0:s}]".format(ipnet)
        print msg
        doLog(msg)
        return False
    
def networkMask(ipnet):
    "Convert a network address to a long integer" 
    (ip, mask) = getIpNetMask(ipnet)
    return dottedQuadToNum(ip) & makeMask(mask)

def getIpNetMask(ipnet):
    pcs = ipnet.split('/')
    ip = pcs[0]
    if len(pcs) > 1:
        try:
            mask = int(pcs[1])
        except ValueError:
            mask = 32
    else:
        mask = 32
    return (ip, mask)
    
def addressInNet(ip, net_n_bits):
    ipaddr = struct.unpack('>L', socket.inet_aton(ip))[0]
    net, bits = getIpNetMask(net_n_bits)
    netaddr = struct.unpack('>L', socket.inet_aton(net))[0]
    netmask = (1 << 32) - (1 << 32 - bits)
    return ipaddr & netmask == netaddr & netmask

def getIplist(dName, ipl, isRecursive=False):
    global recursions, hosts
    domain = dName.to_text()
    if hosts.has_key(domain):
        doLog("Ignoring duplicate domain {0:s}".format(domain))
        return
    
    hosts[domain] = True
    recursions += 1
    if recursions > 50:
        doLog("Over {0:d} recursions, quitting".format(recursions))
        return
    try:
        answers = dns.resolver.query(dName, 'TXT')
    except dns.exception.DNSException:
        if len(dName.labels) > 3:
            p = dName.parent()
            getIplist(p, ipl)
        return
    for data in answers:
        for txt in data.strings:
            doLog("recursion {0:d} queried [{1:s}]".format(recursions, txt))
            f = txt.split(' ')
            if re.match('v=spf1', f[0].strip()):
                parseSpf(f[1:], ipl, dName)
    
def getARecord(dName, ipl, subnet=''):
    try:
        answers = dns.resolver.query(dName, 'A')
    except dns.exception.DNSException:
        return
    for data in answers:
        ipl.append(data.address+subnet)
    
def getMxRecord(dName, ipl, subnet=''):
    try:
        answers = dns.resolver.query(dName, 'MX')
    except dns.exception.DNSException:
        return
    for data in answers:
        mx = data.exchange.to_text()
        if re.match('^[\d\.]{7,15}$', mx):
            ipl.append(mx+subnet)
            continue
        getARecord(mx, ipl, subnet)
    
def parseSpf(fields, ipl, dName):
    for fld in fields:
        doLog('parsing [{0:s}]'.format(fld))        
        kv = fld.split(':')
        key = kv[0].strip()
        m = re.search('^(a|mx)(/|:|$)', key)
        if m:
            type = m.group(1)
            if type == 'a':
                getter = getARecord
            else:
                getter = getMxRecord
            cdr = key.split('/')
            if len(cdr) == 2:
                # a/24
                getter(dName, ipl, '/'+cdr[1])
                continue
            if len(kv) == 1:
                # a
                getter(dName, ipl)
                continue
            # a:domain.com/24
            val = kv[1].strip()
            m = re.search('^([^/]+)(/\d+)?$', val)
            if m:
                domain = m.group(1)
                subnet = m.group(2)
                if subnet == None:
                    subnet = ''
                getter(dns.name.from_text(domain), ipl, subnet)
            continue
        
        m = re.search('^redirect=(.+)$', key)
        if m:
            # redirect=
            rdr = m.group(1)
            getIplist(dns.name.from_text(rdr), ipl, True)
            continue

        if len(kv) != 2:
            continue
        
        val = kv[1].strip()
        if key == 'include':
            getIplist(dns.name.from_text(val), ipl, True)
            continue
        
        if key == 'ip4':
            if isValid(val):
                ipl.append(val)
            continue

def processSpf(ip, ipl, record):
    # see if our sender is in the SPF list
    isAllowed = False
    addr = dottedQuadToNum(ip)
    # go reversed so we can safely remove items
    for thisip in reversed(ipl):
        doLog("process:" + thisip)
        if not isValid(thisip):
            msg = "skipping garbage SPF entry {0:s} for {1:s}" \
                .format(thisip, record)
            print msg
            doLog(msg)
            # skip the garbage
            ipl.remove(thisip)
            continue
        # make sure someone doesn't white list the entire internet
        # 0.0.0.0/1, 130.0.0.0/1
        (ip1, mask1) = getIpNetMask(thisip)
        if mask1 < 16 and not ip1.startswith('172.16') \
           and not ip1.startswith('10.'):
            # first see if we whitelisted this before
            (rc, out, err) = run("pfctl -t mywhite -T show | grep " + thisip)
            if rc:
                msg = "QUESTIONABLE SPF entry {0:s} for {1:s}" \
                    .format(thisip, record)
                print msg
                doLog(msg)
                # not considering this entry for now
                ipl.remove(thisip)
                continue
        if addressInNet(ip, thisip):
            msg = "IP {0:s} matches {1:s}".format(ip, thisip)
            print msg
            doLog(msg)
            isAllowed = True
            # we continue in case we get a /1 or so
    return isAllowed
    
def main():
    global recursions, maxArgs
    print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    (ec, out, err) = run('spamdb | grep GREY')
    if ec:
        if err != '':
            print "Failed running spamdb: {0:s}".format(err)
            exit(1)
        exit(0)
        
    dirty = False
    lines = out.split("\n")
    greys = ['']*maxArgs
    blacks = ['']*maxArgs
    i = 0
    for line in lines:
        line = line.strip()
        if line == '':
            continue
        fields = line.split("|")
        #print fields
        ip = fields[1]
        host = fields[2]
        dots = host.split('.')
        dots.reverse()
        revhost = '.'.join(dots)
        sender = fields[3]
        recip = fields[4]
        record = '{0:s}\t{1:s}\t{2:s}\t"{3:s}"'.format(recip, ip, host, sender)
        m = re.match('^<(abuse|mario(-[^@]+)?)@schmut.com>$', recip)
        removeGrey = False
        if not m:
            # spammer
            doLog("trapping {0:s} {1:s}".format(ip, record))
            blacks[i%maxArgs] = ip
        else:
            keep = "keeping {0:s}".format(record)
            ipl = []
            recursions = 0
            getIplist(dns.name.from_text(host), ipl)
            if len(ipl) > 0:
                if processSpf(ip, ipl, record):
                    print keep
                    doLog(keep)
                    print "adding spf entries of {0:s}".format(host)
                    run("pfctl -t spfwhite -T add {0:s}".format(' '.join(ipl)))
                    removeGrey = True
                    for thisip in ipl:
                        print thisip
                    dirty = True
                else:
                    # bl th bastard
                    doLog("trapping {0:s} {1:s} non SPF".format(ip, record))
                    blacks[i%maxArgs] = ip
                    
            else:
                print keep
                doLog(keep)

        if removeGrey:
            greys[i%maxArgs] = "'{0:s}|{1:s}|{2:s}|{3:s}'".format(ip, host, sender, recip)
        if (i%maxArgs) == (maxArgs-1):
            run("spamdb -ta {0:s}".format(' '.join(blacks)))
            blacks = ['']*maxArgs
            run("spamdb -Gd {0:s}".format(' '.join(greys)))
            greys = ['']*maxArgs
        i += 1
        
    if (i%maxArgs) != maxArgs:
        run("spamdb -ta {0:s}".format(' '.join(blacks)))
        run("spamdb -Gd {0:s}".format(' '.join(greys)))
        
    if dirty:
        run("pfctl -t spfwhite -T show > /var/db/spfwhite.txt")
    print "Processed {0:d} entries\n".format(i)
    
maxArgs = 50
recursions = 0
hosts = {}
main()
ipl = []
Index: usr.sbin/spamdb/spamdb.c
===================================================================
RCS file: /cvs/src/usr.sbin/spamdb/spamdb.c,v
retrieving revision 1.30
diff -u -p -r1.30 spamdb.c
--- usr.sbin/spamdb/spamdb.c	11 Nov 2015 16:10:21 -0000	1.30
+++ usr.sbin/spamdb/spamdb.c	12 Nov 2017 16:56:05 -0000
@@ -264,7 +264,7 @@ extern char *__progname;
 static int
 usage(void)
 {
-	fprintf(stderr, "usage: %s [[-Tt] -a keys] [[-Tt] -d keys]\n", __progname);
+	fprintf(stderr, "usage: %s [[-Tt] -a keys] [[-TtG] -d keys]\n", __progname);
 	exit(1);
 	/* NOTREACHED */
 }
@@ -272,11 +272,11 @@ usage(void)
 int
 main(int argc, char **argv)
 {
-	int i, ch, action = 0, type = WHITE, r = 0, c = 0;
+	int i, ch, action = 0, type = WHITE, r = 0, c = 0, grey = 0;
 	HASHINFO	hashinfo;
 	DB		*db;
 
-	while ((ch = getopt(argc, argv, "adtT")) != -1) {
+	while ((ch = getopt(argc, argv, "adtTG")) != -1) {
 		switch (ch) {
 		case 'a':
 			action = 1;
@@ -290,6 +290,9 @@ main(int argc, char **argv)
 		case 'T':
 			type = SPAMTRAP;
 			break;
+		case 'G':
+			grey = 1;
+			break;
 		default:
 			usage();
 			break;
@@ -327,6 +330,13 @@ main(int argc, char **argv)
 		for (i=0; i<argc; i++)
 			if (argv[i][0] != '\0') {
 				c++;
+				if (grey) {
+					char *p = argv[i];
+					while(*p) {
+						if (*p == '|') *p = '\n';
+						p++;
+					}
+				}
 				r += dbupdate(db, argv[i], 0, type);
 			}
 		if (c == 0)

Reply via email to