Bob Menschel asked me to provide this script. It's my corpus-nightly.pl perl script that can be used in place of Dan's corpus-nightly shell script. Note: it's probably not that great and I wrote it a while ago so it may actually be very much inferior to Dan's script.
I think I originally wrote it since Dan's script required it to be run by cron, hourly. My script should be able to be run at any time of day -- which makes it better for desktop systems. -- Duncan Findlay
#!/usr/bin/perl
use Getopt::Long;
my $fullcheck = 0;
my $net = 0;
my $auto = 0; # From a cron job?
my $after = "--after='-6 months'";
my $corpusfile;
GetOptions("fullcheck" => \$fullcheck,
"network" => \$net,
"auto" => \$auto);
if ($fullcheck) {
$corpusfile = "/home/duncf/Maildir/Scripts/submit-folder";
} else {
$corpusfile = "/home/duncf/Maildir/Scripts/nightly-folders";
}
my $tree = "/home/duncf/svn/spamassassin-nightly";
my $username = "daf";
my $pw = "******";
# Check whether to start or not
# Start if:
# 1. It's after 9:00 UTC
# 2. mass-check has not yet been started today
# Run only after 9:00 UTC
my @time = gmtime(time);
if ($time[2] < 9) {
if ($auto) {
exit 2; # Bad time
} else {
warn "Before 9:00 UTC. Start not recommended.";
}
}
if (-f "$tree/masses/test.start") {
# Could be running or long dead
open START, "$tree/masses/test.start";
my $start = <START>;
chomp $start;
my @starttime = gmtime($start);
if (($time[5] == $starttime[5]) &&
($time[4] == $starttime[4]) &&
($time[3] == $starttime[3])) {
if ($auto) {
exit 2; # Don't start again
} else {
warn "Already started today. Start not recommended.";
}
}
}
# Net runs on Saturday
if ($time[6] == 6) {
if ($auto) {
$net = 1;
} elsif (!$net) {
warn "Net run recommended.";
}
}
chdir $tree;
my $revisionfile;
if ($net) {
$revisionfile = "weekly-versions.txt";
$after = "--after='-1 month'";
} else {
$revisionfile = "nightly-versions.txt";
}
unlink $revisionfile;
system("wget http://rsync.spamassassin.org/$revisionfile") == 0 or die;
open REVISIONS, $revisionfile;
my $revision;
while (my $line = <REVISIONS>) {
if ($line =~ /^[0-9-]+\s+(\d+)$/) {
$revision = $1;
}
}
close REVISIONS;
die "Can't find revision" unless $revision;
my $retry = 0;
while ($retry < 60) {
last if system("svn update -r $revision") == 0;
$retry++;
sleep 30;
}
chdir "masses";
unlink glob("spamassassin/bayes*");
unlink "razor-agent.log";
unlink "test.end";
open DATE, ">test.start" or die "Can't open test.start. $!";
print DATE time . "\n";
close DATE;
my $progress = $auto ? "" : "--progress --showdots";
if ($net) {
system ("./mass-check --net -j 8 -f $corpusfile $after $progress") == 0 or die "Error with mass-check";
} else {
system ("./mass-check -f $corpusfile $after $progress") == 0 or die "Error with mass-check";
}
open DATE, ">test.end";
print DATE time . "\n";
close DATE;
local $ENV{RSYNC_PASSWORD} = $pw;
my $tag = $net ? "net-" : "";
system("rsync -CPcvuzb --timeout=120 ham.log [EMAIL PROTECTED]::corpus/ham-$tag$username.log") == 0 or warn "Error with rsync.";
system("rsync -CPcvuzb --timeout=120 spam.log [EMAIL PROTECTED]::corpus/spam-$tag$username.log") == 0 or warn "Error with rsync.";
signature.asc
Description: Digital signature
