Daniel Quinlan wrote:
> Any other ideas?

I've got a plugin (attached) that tracks the occurrence of non-blacklisted domains in messages, storing the data in an SQL database and emailing the data out periodically. The plugin expires it's database automatically.

The emails generated could be used to calculate the domains most often seen.

Two tables, described in the attached TABLES file, are needed.


Daryl
=head1 NAME

Miail::SpamAssassin::Plugin::Reporting

=head1 SYNOPSIS

  loadplugin Mail::SpamAssassin::Plugini::Reporting [/path/to/Reporting.pm]

  rbl_statistics_report ( 0 | 1 )       (default: 0)
      Whether to report statistics used in generating SURBL whitelists.
      It requires that you setup the SQL tables described below.

  rbl_statistics_dsn DBI:databasetype:databasename:hostname:port
      Your SQL DSN.
      Example: "DBI:mysql:spamassassin:localhost"

  rbl_statistics_username
      Your SQL username.

  rbl_statistics_password
      Your SQL password.

  rbl_statistics_report_interval
      The report interval.

  rbl_statistics_to_address
      The email address to send the reports to.

  rbl_statistics_from_address
      The email address to send the reports from.

  rbl_statistics_expire_to      (default: 250)
      The number of domains left in your database after an expiry.

  rbl_statistics_expire_at      (default: 500)
      The number of domains at which the plugin will expire old entries
      in your database.  This must be set to at least twice the number
      set in rbl_statistics_expire_to to avoid excessive expiry runs.

  rbl_statistics_report_number  (default: 200)
      The (top) number of domains to include in the emailed report.

=head1 DESCRIPTION

This plugin records data on the top domains appearing in messages processed
by SpamAssassin and stores it in an SQL database.  Periodically the plugin
will, if enabled, report the statistical data to the SpamAssassin developers
for use in generating RBL whitelists.

=head1 AUTHOR

Daryl C. W. O'Shea <[EMAIL PROTECTED]>

=head1 COPYRIGHT

Copyright (c) 2005 Daryl C. W. O'Shea.  All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0
 
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut


package Mail::SpamAssassin::Plugin::Reporting;

*dbg=\&Mail::SpamAssassin::dbg;

use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Conf::Parser;
use strict;
use warnings;
use bytes;

use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);

use constant HAS_DBI => eval { require DBI; };
use constant HAS_NET_SMTP => eval { require Net::SMTP; };
use constant HAS_NET_DNS => eval { require Net::DNS; };

my $CONF_TYPE_STRING = 1;
my $CONF_TYPE_BOOL = 2;
my $CONF_TYPE_NUMERIC = 3;


# constructor: register the eval rule
sub new {
  my $class = shift;
  my $mailsaobject = shift;

  $class = ref($class) || $class;
  my $self = $class->SUPER::new($mailsaobject);
  bless ($self, $class);

  # 3.1 supports a better configuration method that 3.0 doesn't
  $self->set_config($mailsaobject->{conf}) if (Mail::SpamAssassin::Version() !~ 
/^3\.0/);

  $self->{_dbh} = undef;

  unless (HAS_DBI) {
    dbg("rbl reporter: Unable to connect to database: DBI module not available: 
$!");
  }
  unless (HAS_NET_DNS) {
    dbg("rbl reporter: required module, Net::DNS, not available: $!");
  }
  unless (HAS_NET_SMTP) {
    dbg("rbl reporter: required module, Net::SMTP, not available: $!");
  }

  return $self;
}


# parse config options
sub parse_config {
  # 3.1 supports a better configuration method
  return 0 if (Mail::SpamAssassin::Version() !~ /^3\.0/);

  my ($self, $opts) = @_;
  my $key = $opts->{key};

  if ($key eq 'rbl_statistics_report') {
    $opts->{conf}->{rbl_statistics_report} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_to_address') {
    $opts->{conf}->{rbl_statistics_to_address} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_from_address') {
    $opts->{conf}->{rbl_statistics_from_address} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_expire_to') {
    $opts->{conf}->{rbl_statistics_expire_to} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_expire_at') {
    $opts->{conf}->{rbl_statistics_expire_at} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_report_number') {
    $opts->{conf}->{rbl_statistics_report_number} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_dsn') {
    $opts->{conf}->{rbl_statistics_dsn} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_username') {
    $opts->{conf}->{rbl_statistics_username} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_password') {
    $opts->{conf}->{rbl_statistics_password} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  } elsif ($key eq 'rbl_statistics_report_interval') {
    $opts->{conf}->{rbl_statistics_report_interval} = $opts->{value};
    $self->inhibit_further_callbacks();
    return 1;
  }
  return 0;
}


# register config options
sub set_config {
  my ($self, $conf) = @_;  
  my @cmds = ();

  push (@cmds, {
    setting => 'rbl_statistics_report',
    is_admin => 1,
    default => 0,
    type => $CONF_TYPE_BOOL
  });

  push (@cmds, {
    setting => 'rbl_statistics_to_address',
    is_admin => 1,
    default => '[EMAIL PROTECTED]',
    type => $CONF_TYPE_STRING
  });

  push (@cmds, {
    setting => 'rbl_statistics_from_address',
    is_admin => 1,
    default => '',
    type => $CONF_TYPE_STRING
  });

  push (@cmds, {
    setting => 'rbl_statistics_expire_to',
    is_admin => 1,
    default => 250,
    type => $CONF_TYPE_NUMERIC
  });

  push (@cmds, {
    setting => 'rbl_statistics_from_expire_at',
    is_admin => 1,
    default => 500,
    type => $CONF_TYPE_NUMERIC
  });

  push (@cmds, {
    setting => 'rbl_statistics_report_number',
    is_admin => 1,
    default => 200,
    type => $CONF_TYPE_NUMERIC
  });

  push (@cmds, {
    setting => 'rbl_statistics_dsn',
    is_admin => 1,
    default => '',
    type => $CONF_TYPE_STRING
  });

  push (@cmds, {
    setting => 'rbl_statistics_username',
    is_admin => 1,
    default => '',
    type => $CONF_TYPE_STRING
  });

  push (@cmds, {
    setting => 'rbl_statistics_password',
    is_admin => 1,
    default => '',
    type => $CONF_TYPE_STRING
  });

  push (@cmds, {
    setting => 'rbl_statistics_report_interval',
    is_admin => 1,
    default => 604800,
    type => $CONF_TYPE_NUMERIC
  });
  
  $conf->{parser}->register_commands([EMAIL PROTECTED]);
}


sub check_end {
  my ($self, $scanner) = @_;

  unless ($scanner->{permsgstatus}->{conf}->{rbl_statistics_report}) {
    dbg("rbl reporter: check_end: not enabled, skipping");
    return 1;
  }

  my $pms = $scanner->{permsgstatus};
  my $conf = $scanner->{permsgstatus}->{conf};
  my $uribl_hits = $pms->{uribl_scanstate}->{hits};

  $self->{_dsn} = $conf->{rbl_statistics_dsn};
  $self->{_dbuser} = $conf->{rbl_statistics_username};
  $self->{_dbpass} = $conf->{rbl_statistics_password};
  $self->{_report_interval} = $conf->{rbl_statistics_report_interval};
  $self->{_to_address} = $conf->{rbl_statistics_to_address};
  $self->{_from_address} = $conf->{rbl_statistics_from_address};
  $self->{_expire_to} = $conf->{rbl_statistics_expire_to};
  $self->{_expire_at} = $conf->{rbl_statistics_expire_at};
  $self->{_report_number} = $conf->{rbl_statistics_report_number};

  unless (defined($self->{_dsn})) {
    dbg("rbl reporter: DSN not defined, aborting");
    return 0;
  }
  unless (defined($self->{_dbpass})) {
    dbg("rbl reporter: SQL username not defined, aborting");
    return 0;
  }
  unless (defined($self->{_dbuser})) {
    dbg("rbl reporter: SQL password not defined, aborting");
    return 0;
  }

  # we need to set the defaults here incase we're using 3.0
  $self->{_report_interval} ||= 604800;
  $self->{_to_address} ||= "[EMAIL PROTECTED]";
  $self->{_expire_to} ||= 250;
  $self->{_expire_at} ||= 500;
  $self->{_report_number} ||= 200;

  # prevent excessive expiries and the elimination of domains prematurely
  $self->{_expire_at} = 2 * $self->{_expire_to} if ($self->{_expire_at} < 2 * 
$self->{_expire_to});

  dbg("rbl reporter: $pms->{uri_domain_count} domains found in message");
  return unless ($pms->{uri_domain_count});
  my $non_blacklist_domain_count = $pms->{uri_domain_count};

  my %domains;
  for (@{$pms->{uri_list}}) {
    my $domain = Mail::SpamAssassin::Util::uri_to_domain($_);
    $domains{$domain} = 1 if $domain;
  }

  # remove blacklisted domains
  while (my($rulename) = each (%{$uribl_hits})) {
    while (my($domain) = each (%{$uribl_hits->{$rulename}})) {
      if (exists($domains{$domain})) {
        delete $domains{$domain};
        $non_blacklist_domain_count--;
        dbg("rbl reporter: domain: $domain was found in uridnsbl, not 
reporting");
      }
    }
  }
  return unless ($non_blacklist_domain_count);

  $self->_connect_db();

  while (my($domain) = each (%domains)) {
    $self->_put_domain($domain, $pms->is_spam);
    dbg("rbl reporter: processing domain: $domain");
  }

  $self->_do_db_expiry() if $self->_db_expiry_check();
  $self->_report() if $self->_report_check();

  $self->_disconnect_db();

  return 1;
}


# check to see if a domain is already in the database
sub _domain_in_db {
  my ($self, $domain) = @_;

  return (-1, 0) unless (defined($self->{_dbh}));

  my $sql = "SELECT id FROM rbl_reporting WHERE domain = ?";
  my $sth = $self->{_dbh}->prepare_cached($sql);

  unless (defined($sth)) {
    dbg("rbl reporter: domain_in_db: SQL Error: ".$self->{_dbh}->errstr());
    return (-1, 0);
  }

  my $rc = $sth->execute($domain);

  unless ($rc) {
    dbg("rbl reporter: domain_in_db: SQL Error: ".$self->{_dbh}->errstr());
    return (-1, 0);
  }

  my ($id) = $sth->fetchrow_array();

  $sth->finish();

  $rc = 0 if ($rc eq '0E0');
  $id ||= 0;
  return ($rc, $id);
}


# insert or update a domain's statistics
sub _put_domain {
  my ($self, $domain, $is_spam) = @_;

  return 0 unless (defined($self->{_dbh}));
  return 0 unless (defined($is_spam));

  my ($existing_domain, $id) = $self->_domain_in_db($domain);
  return 0 if ($existing_domain == -1); # db error in _domain_in_db

  if ($existing_domain == 1 && $id == 0) {
    dbg("rbl reporter: DB logic error: domain exists but id is 0");
    return 0;
  }

  if (!$existing_domain) {

    my $sql = "INSERT INTO rbl_reporting
               (domain, spam_count, ham_count, ctime, mtime, rtime)
               VALUES (?,?,?,?,?,?)";

    my $sth = $self->{_dbh}->prepare_cached($sql);

    unless (defined($sth)) {
      dbg("rbl reporter: _put_domain: SQL Error: ".$self->{_dbh}->errstr());
      return 0;
    }

    my $time = time;
    my $rc = $sth->execute($domain, $is_spam, !$is_spam, $time, $time, $time);

    unless ($rc) {
      dbg("rbl reporter: _put_domain: SQL Error: ".$self->{_dbh}->errstr());
      return 0;
    }
    $sth->finish();

    $sql = "UPDATE rbl_reporting_vars
               SET value = value + 1
             WHERE variable = 'number_domains'";

    my $rows = $self->{_dbh}->do($sql);

    unless (defined($rows)) {
      dbg("rbl reporter: _put_domain: vars update: SQL Error: 
".$self->{_dbh}->errstr());
    }
    $rows = undef;

    $sql = "UPDATE rbl_reporting_vars
               SET value = ?
             WHERE variable = 'time_domain_last_added'";

    $rows = $self->{_dbh}->do($sql, undef, $time);

    unless (defined($rows)) {
      dbg("rbl reporter: _put domain: vars update: SQL Error: 
".$self->{_dbh}->errstr());
    }
  }
  else { # domain already exists, update it

    my $sql = "UPDATE rbl_reporting
                  SET spam_count = spam_count + ?,
                      ham_count = ham_count + ?,
                      mtime = ?
                WHERE id = ?";

    my $time = time;
    my $rows = $self->{_dbh}->do($sql, undef, $is_spam, !$is_spam, $time, $id);

    unless (defined($rows)) {
      dbg("rbl reporter: _put_domain: SQL Error: ".$self->{_dbh}->errstr());
      return 0;
    }
  }

  return 1;
}


# check to see if a database expiry is needed
sub _db_expiry_check {
  my ($self) = @_;

  my $sql = "SELECT value FROM rbl_reporting_vars 
              WHERE variable = 'number_domains'";

  my $sth = $self->{_dbh}->prepare_cached($sql);

  unless (defined($sth)) {
    dbg("rbl reporter: _expire_db: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  my $rc = $sth->execute();

  unless (defined($rc)) {
    dbg("rbl reporter: _expire_db: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  my ($count) = $sth->fetchrow_array();
  $sth->finish();

  unless (defined($count)) {
    dbg("rbl reporter: _db_expiry_check: number_domains not defined!");
    return 0;
  }

  if ($count > $self->{_expire_at}) {
    dbg("rbl reporter: _db_expiry_check: $count domains in DB, doing expiry");
    return 1;
  } else {
    dbg("rbl reporter: _db_expiry_check: $count domains in DB, skipping 
expiry");
    return 0;
  }
}


# do database expiry
sub _do_db_expiry {
  my ($self) = @_;
  my $time = time;

  dbg("rbl reporter: _do_db_expiry: doing DB expiry");

  my $limit = $self->{_expire_to} + 10000; # limit to an expiry of 10,000 
domains to prevent spamc timeout
  my $sql = "SELECT id FROM rbl_reporting 
           ORDER BY ( (spam_count+ham_count) / (GREATEST($time-mtime,1)) ) 
         DESC LIMIT $self->{_expire_to}, $limit";

  my $sth = $self->{_dbh}->prepare_cached($sql);

  unless (defined($sth)) {
    dbg("rbl reporter: _do_db_expiry: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  my $rc = $sth->execute();

  unless (defined($rc)) {
    dbg("rbl reporter: _do_db_expiry: SQL Error: ".$self->{_dbh}-errstr());
    return 0;
  }

  # incase another child has already done the expiry
  if ($rc == 0) {
    dbg("rbl reporter: _do_db_expiry: no domains to expire, aborting expiry");
    return 1;
  }

  my $ids_arrayref = $sth->fetchall_arrayref();

  unless (defined($ids_arrayref)) {
    dbg("rbl reporter: _do_db_expiry: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }
  $sth->finish();

  # build sql WHERE string for DELETE
  my $ids = '';
  foreach (@$ids_arrayref) {
    $ids .= "[EMAIL PROTECTED] OR ";
  }
  chop $ids; chop $ids; chop $ids; chop $ids;

  dbg("rbl reporter: _do_db_expiry: expiring $rc domains with IDs: '$ids'");

  $sql = "DELETE FROM rbl_reporting WHERE $ids";
  $sth = $self->{_dbh}->prepare($sql);

  unless (defined($sth)) {
    dbg("rbl reporter: _do_db_expiry: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  $rc = $sth->execute();
  $sth->finish();

  unless (defined($rc)) {
    dbg("rbl reporter: _do_db_expiry: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  dbg("rbl reporter: _do_db_expiry: $rc domains expired");

  $sql = "UPDATE rbl_reporting_vars SET value = value - $rc 
           WHERE variable = 'number_domains'";

  my $rows = $self->{_dbh}->do($sql);
  dbg("rbl reporter: _do_db_expiry: failed to decrement number_domains") unless 
$rows;

  $sql = "UPDATE rbl_reporting_vars SET value = value + $rc WHERE variable = 
'number_domains_expired'";
  $rows = $self->{_dbh}->do($sql);
  dbg("rbl reporter: _do_db_expiry: failed to increment 
number_domains_expired") unless $rows;

  $sql = "UPDATE rbl_reporting_vars SET value = $time WHERE variable = 
'time_last_expiry'";
  $rows = $self->{_dbh}->do($sql);
  dbg("rbl reporter: _do_db_expiry: failed to set time_last_expiry") unless 
$rows;

  dbg("rbl reporter: _do_db_expiry: expiry variable update complete");

  return 1;
}


# check to see if it is time to send a report
sub _report_check {
  my ($self) = @_;

  return 0 unless (defined($self->{_dbh}));

  my $sql = "SELECT value FROM rbl_reporting_vars 
              WHERE variable = 'time_last_report'";

  my $sth = $self->{_dbh}->prepare_cached($sql);

  unless (defined($sth)) {
    dbg("rbl reporter: _report_check: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  my $rc = $sth->execute();

  unless (defined($rc)) {
    dbg("rbl reporter: _report_check: SQL Error: ".$self->{_dbh}->errstr());
    return 0;
  }

  my ($time_last_report) = $sth->fetchrow_array();
  $sth->finish();

  unless (defined($time_last_report)) {
    dbg("rbl reporter: _report_check: time_last_report not defined!");
    return 0;
  }

  # 'lock' the database for reporting
  # we don't actually lock it, we just indicate that we're doing a report so
  # that concurrent processes don't try and also do a report
  
  # here's the 'lock' method...
  # 1. check to see if the last report time is negative... that signifies
  #    that another report is in process
  # 2. if it's positive, we try to make it negative... we use the WHERE clause
  #    with both the variable and value in it to make sure another concurrent
  #    process hasn't beet us to it... we flip the sign of the current time
  #    value so that we can turn it back if the report later fails

  if ($time_last_report > 0 && $self->{_report_interval} < time - 
$time_last_report) {
    dbg("rbl reporter: _report_check: time to send a report, doing report");

    $sql = "UPDATE rbl_reporting_vars
               SET value = value * -1
             WHERE variable = 'time_last_report' AND value = $time_last_report";

    $rc = $self->{_dbh}->do($sql);

    if (!defined($rc) || $rc eq '0E0' || $rc == 0) {
      dbg("rbl reporter: _report_check: another process got the expiry lock 
first, skipping report");
      return 0;
    }
    return 1;
  } else {
    dbg("rbl reporter: _report_check: too early to send a report, skipping 
report");
    return 0;
  }
}


# reverse the expiry lock
sub _report_reverse_lock {
  my ($self) = @_;

  my $sql = "UPDATE rbl_reporting_vars
               SET value = value * -1
             WHERE variable = 'time_last_report'";

  my $rc =  $self->{_dbh}->do($sql);
  if (defined($rc) && $rc == 1) {
    dbg("rbl reporter: _report_reverse_lock: report lock successfully 
reversed");
    return 1;
  } else {
    dbg("rbl reporter: _report_reverse_lock: report lock reversal failed: 
".$self->{_dbh}->errstr());
    return 0;
  }
}


# send statistics report via email
sub _report {
  my ($self) = @_;

  dbg("rbl reporter: sending rbl statistics report");
  unless (HAS_NET_DNS) {
    dbg("rbl reporter: _report: required module, Net::DNS, not available, 
aborting");
    $self->_report_reverse_lock();
    return 0;
  }
  unless (HAS_NET_SMTP) {
    dbg("rbl reporter: _report: required module, Net::SMTP, not available, 
aborting");
    $self->_report_reverse_lock();
    return 0;
  }

  unless (defined($self->{_dbh})) {
    dbg("rbl reporter: _report: database handle undefined, aborting");
    $self->_report_reverse_lock();
    return 0;
  }

  # message variables
  my $subject = "rbl statistics report via " . Mail::SpamAssassin::Version();
  my $user = $self->{main}->{'username'} || 'unknown';
  my $host = Mail::SpamAssassin::Util::fq_hostname() || 'unknown';
  my $from = $self->{_from_address} || "[EMAIL PROTECTED]";

  my $time = time;

  # message data
  my %head = (
              'To' => $self->{_to_address},
              'From' => $from,
              'Subject' => $subject,
              'Date' => Mail::SpamAssassin::Util::time_to_rfc822_date(),
              'Message-Id' =>
                sprintf("<[EMAIL PROTECTED]>",$time,int(rand(2**32)),$host),
              );

  my $sql = "SELECT id, domain, spam_count, ham_count, rtime 
              FROM rbl_reporting
          ORDER BY ( (spam_count+ham_count) / (GREATEST($time-mtime,1)) ) 
        DESC LIMIT $self->{_report_number}";

  my $sth = $self->{_dbh}->prepare_cached($sql);

  unless (defined($sth)) {
    dbg("rbl reporter: _report: SQL Error: ".$self->{_dbh}->errstr());
    $self->_report_reverse_lock();
    return 0;
  }

  my $rc = $sth->execute();

  unless (defined($rc)) {
    dbg("rbl reporter: _report: SQL Error: ".$self->{_dbh}-errstr());
    $self->_report_reverse_lock();
    return 0;
  }

  # incase another child has already done the expiry
  if ($rc == 0) {
    dbg("rbl reporter: _report: no domains to report, aborting report");
    return 1;
  }

  my $report_data_arrayref = $sth->fetchall_arrayref();

  unless (defined($report_data_arrayref)) {
    dbg("rbl reporter: _report: SQL Error: ".$self->{_dbh}->errstr());
    $self->_report_reverse_lock();
    return 0;
  }
  $sth->finish();

  # generate report body
  my $body = "report time: $time\ndomains: 
$rc\n\nspam_count\tham_count\ttime_span\tdomain\n\n";
  foreach (@$report_data_arrayref) {
    $body .= "@[EMAIL PROTECTED]" . ($time - @$_[4]) . "[EMAIL PROTECTED]";
  }

  # compose message
  my $message;
  while (my ($k, $v) = each %head) {
    $message .= "$k: $v\n";
  }
  $message .= "\n" . $body;

  # send message
  my $failure;
  my $mx = $head{To};
  my $hello = Mail::SpamAssassin::Util::fq_hostname() || $from;
  $mx =~ s/.*\@//;
  $hello =~ s/.*\@//;
  for my $rr (Net::DNS::mx($mx)) {
    my $exchange = Mail::SpamAssassin::Util::untaint_hostname($rr->exchange);
    next unless $exchange;
    my $smtp;
    if ($smtp = Net::SMTP->new($exchange,
                               Hello => $hello,
                               Port => 25,
                               Timeout => 100))
    {   # smtp_dbg doesn't work for me (causing things to die, leaving the DB 
lock in place)
      if ($smtp->mail($head{From}) && #smtp_dbg("FROM $head{From}", $smtp) &&
          $smtp->recipient($head{To}) && #smtp_dbg("TO $head{To}", $smtp) &&
          $smtp->data($message) && #smtp_dbg("DATA", $smtp) &&
          $smtp->quit() )# && smtp_dbg("QUIT", $smtp))
      {
        dbg("rbl reporter: _report: report to $exchange succeeded");
        $failure = undef;
        last;
      }
      my $code = $smtp->code();
      my $text = $smtp->message();
      $failure = "$code $text" if ($code && $text);
    }
    $failure ||= "Net::SMTP error";
    chomp $failure;
    dbg("rbl reporter: _report: report to $exchange failed: $failure");
  }
  if ($failure) {
    $self->_report_reverse_lock();
    return 0;
  }

  # now update database to reflect a successful report
  $sql = "UPDATE rbl_reporting_vars 
             SET value = $time 
           WHERE variable = 'time_last_report'";

  my $rows = $self->{_dbh}->do($sql);
  dbg("rbl reporter: _report: failed to update time_last_report: 
".$self->{_dbh}->errstr()) unless $rows;

  $sql = "UPDATE rbl_reporting_vars
             SET value = value + 1
           WHERE variable = 'number_reports_sent'";

  $rows = $self->{_dbh}->do($sql);
  dbg("rbl reporter: _report: failed to increment number_reports_sent: 
".$self->{_dbh}->errstr()) unless $rows;

  # lots of updates (we update one at a time instead of doing them all at once
  # for accuracy... some domains could be updated while we're doing the report)
  foreach (@$report_data_arrayref) {

    $sql = "UPDATE rbl_reporting SET spam_count = spam_count - @$_[2],
                            spam_count_reported = spam_count_reported + @$_[2],
                                      ham_count = ham_count - @$_[3],
                             ham_count_reported = ham_count_reported + @$_[3],
                                          rtime = $time
             WHERE id = @$_[0]";

    $sth = $self->{_dbh}->prepare_cached($sql);

    unless (defined($sth)) {
      dbg("rbl reporter: _report: SQL Error: ".$self->{_dbh}->errstr());
      next;
    }

    my $rc = $sth->execute();

    unless (defined($rc)) {
      dbg("rbl reporter: _report: SQL Error: ".$self->{_dbh}-errstr());
      next;
    }

    # incase another child has already done the expiry
    if ($rc != 1) {
      dbg("rbl reporter: _report: UPDATE row count should be 1 not $rc, 
weird... continuing anyway");
      next;
    }
  }
  return 1;
}


# connect to sql database
sub _connect_db {
  my ($self, $scanner) = @_;
  my $conf = $scanner->{permsgstatus}->{conf};

  return 0 unless (HAS_DBI);
  return 1 if ($self->{_dbh}); # already connected

  my $dbh = DBI->connect($self->{_dsn}, $self->{_dbuser}, $self->{_dbpass},
                        {'PrintError' => 0, 'AutoCommit' => 1});

  if (!$dbh) {
    dbg("rbl reporter: Unable to connect to database: ".DBI->errstr());
    return 0;
  }
  else {
    dbg("rbl reporter: Database connection established");
  }

  $self->{_dbh} = $dbh;
  return 1;
}


# disconnect from database
sub _disconnect_db {
  my ($self) = @_;

  return unless (defined($self->{_dbh}));

  $self->{_dbh}->disconnect();
  $self->{_dbh} = undef;
}
Loadplugin Mail::SpamAssassin::Plugin::Reporting 
/etc/mail/spamassassin/Reporting.pm

rbl_statistics_report           1       
rbl_statistics_from_address     [EMAIL PROTECTED]
rbl_statistics_dsn              DBI:mysql:spamassassin:localhost
rbl_statistics_username         spamassassin
rbl_statistics_password         password

CREATE TABLE `rbl_reporting` (
  `id` int(11) NOT NULL auto_increment,
  `domain` varchar(255) NOT NULL default '',
  `spam_count` int(11) NOT NULL default '0',
  `ham_count` int(11) NOT NULL default '0',
  `spam_count_reported` int(11) NOT NULL default '0',
  `ham_count_reported` int(11) NOT NULL default '0',
  `ctime` int(11) NOT NULL default '0',
  `mtime` int(11) NOT NULL default '0',
  `rtime` int(11) NOT NULL default '0',
  PRIMARY KEY  (`id`,`domain`),
  KEY `domain` (`domain`)
) TYPE=MyISAM;

CREATE TABLE `rbl_reporting_vars` (
  `variable` varchar(30) NOT NULL default '',
  `value` int(11) NOT NULL default '0',
  PRIMARY KEY  (`variable`)
) TYPE=MyISAM;

Reply via email to