Bob Menschel asked me to provide this script. It's my
corpus-nightly.pl perl script that can be used in place of Dan's
corpus-nightly shell script. Note: it's probably not that great and I
wrote it a while ago so it may actually be very much inferior to Dan's
script.

I think I originally wrote it since Dan's script required it to be run
by cron, hourly. My script should be able to be run at any time of day
-- which makes it better for desktop systems.

-- 
Duncan Findlay
#!/usr/bin/perl

use Getopt::Long;

my $fullcheck = 0;
my $net = 0;
my $auto = 0; # From a cron job?
my $after = "--after='-6 months'";
my $corpusfile;

GetOptions("fullcheck" => \$fullcheck,
	   "network" => \$net,
	   "auto" => \$auto);

if ($fullcheck) {
  $corpusfile = "/home/duncf/Maildir/Scripts/submit-folder";
} else {
  $corpusfile = "/home/duncf/Maildir/Scripts/nightly-folders";
}

my $tree = "/home/duncf/svn/spamassassin-nightly";

my $username = "daf";
my $pw = "******";

# Check whether to start or not
# Start if:
# 1. It's after 9:00 UTC
# 2. mass-check has not yet been started today

# Run only after 9:00 UTC
my @time = gmtime(time);
if ($time[2] < 9) {
  if ($auto) {
    exit 2; # Bad time
  } else {
    warn "Before 9:00 UTC. Start not recommended.";
  }
}

if (-f "$tree/masses/test.start") {
  # Could be running or long dead
  open START, "$tree/masses/test.start";
  my $start = <START>;
  chomp $start;
  my @starttime = gmtime($start);
  if (($time[5] == $starttime[5]) &&
      ($time[4] == $starttime[4]) &&
      ($time[3] == $starttime[3])) {
    if ($auto) {
      exit 2; # Don't start again
    } else {
      warn "Already started today. Start not recommended.";
    }
  }
}

# Net runs on Saturday
if ($time[6] == 6) {
  if ($auto) {
    $net = 1;
  } elsif (!$net) {
    warn "Net run recommended.";
  }
}

chdir $tree;
my $revisionfile;
if ($net) {
  $revisionfile = "weekly-versions.txt";
  $after = "--after='-1 month'";
} else {
  $revisionfile = "nightly-versions.txt";
}

unlink $revisionfile;
system("wget http://rsync.spamassassin.org/$revisionfile";) == 0 or die;
open REVISIONS, $revisionfile;

my $revision;
while (my $line = <REVISIONS>) {
  if ($line =~ /^[0-9-]+\s+(\d+)$/) {
    $revision = $1;
  }
}

close REVISIONS;

die "Can't find revision" unless $revision;

my $retry = 0;
while ($retry < 60) {
  last if system("svn update -r $revision") == 0;
  $retry++;
  sleep 30;
}

chdir "masses";
unlink glob("spamassassin/bayes*");
unlink "razor-agent.log";
unlink "test.end";

open DATE, ">test.start" or die "Can't open test.start. $!";
print DATE time . "\n";
close DATE;

my $progress = $auto ? "" : "--progress --showdots";

if ($net) {
  system ("./mass-check --net -j 8 -f $corpusfile $after $progress") == 0 or die "Error with mass-check";
} else {
  system ("./mass-check -f $corpusfile $after $progress") == 0 or die "Error with mass-check";
}

open DATE, ">test.end";
print DATE time . "\n";
close DATE;

local $ENV{RSYNC_PASSWORD} = $pw;

my $tag = $net ? "net-" : "";

system("rsync -CPcvuzb --timeout=120 ham.log [EMAIL PROTECTED]::corpus/ham-$tag$username.log") == 0 or warn "Error with rsync.";
system("rsync -CPcvuzb --timeout=120 spam.log [EMAIL PROTECTED]::corpus/spam-$tag$username.log") == 0 or warn "Error with rsync.";

Attachment: signature.asc
Description: Digital signature

Reply via email to