#!/usr/local/bin/perl

print "Beginning scan of STDIN for spam levels\n";

# Minimum score to be called spam
$SPAMLEVEL = 5;

# Maximum number of mails; top of graph
$MAXMAILS = 350;

# Check for -t flag to only process today's mail
if (defined($ARGV[0])) {
	if ($ARGV[0] eq "-t") {
		print "Using -t flag to only process today's mail\n";
		$ONLYTODAY = 1;
	} else {
		die "Invalid flag $ARGV[0]\n";
	}
}

# Location of gnuplot program
$GNUPLOT = "/usr/bin/gnuplot";

# Name for GNUPLOT file
$GNUSCRIPT = "gnuplotscript";
open (GNU,">$GNUSCRIPT") || die "Could not open GNUplot output file\n";

# All spam count file
open (ALLSPAM,">>allspam.txt") || die "Could not open AllSpam count file\n";
open (ALLHAM,">>allham.txt") || die "Could not open AllHam count file\n";

# Hash to convert month name to a number
%mon2num = qw(
 jan 1  feb 2  mar 3  apr 4  may 5  jun 6
 jul 7  aug 8  sep 9  oct 10 nov 11 dec 12
);
#print $mon2num{ lc substr($mo­nth, 0, 3) };

# Hash for all dates
%datehash = ();
# Hashes for total spams and total hams per date
%spam = ();
%ham = ();
# Max and min scores
$maxscore = 0;
$minscore = 0;

# To sort numerically
sub numerically { $a <=> $b; }

# Set $month and $day to today
@months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
($sec,$min,$hours,$mday,$mon,$lyear) = localtime();
$today = sprintf("%s %2d",@months[$mon],$mday);

# Print out gnuplot junk
print GNU "set xrange [-20:80]\nset yrange [0:$MAXMAILS]\n";
print GNU "set label \"<-Ham\" at 3,10 right\n";
print GNU "set label \"Spam->\" at 5,10 left\n";
print GNU "set xlabel \"Spam Level\"\n";
print GNU "set ylabel \"Number of Emails\"\n";
print GNU "set linestyle 1 linetype -1 linewidth 0\n";
$spamline = $SPAMLEVEL - 1;	# We need to draw at 4 for spam level 5 for graph to come out right
print GNU "set arrow from $spamline,0 to $spamline,$MAXMAILS nohead ls 1\n";

while (<STDIN>) {
	$line = $_;
	chop $line;
	if (($line =~ m/spam_scan/) && ((!$ONLYTODAY) || ($line =~ m/^$today/))) {
	# We only want to match if ONLYTODAY is off (no -t flag) or if ONLYTODAY is on and we see a line matching today
		# Find month and day
		$monthday = (substr $_, 0, 6);

		# Convert to numbers, guess that year is the same
		($sec,$min,$hours,$mday,$mon,$lyear) = localtime();
		$year = $lyear - 100;
		($month, $day) = split (" ",$monthday);
		# Convert text month to number
		$month = $mon2num{ lc substr($month, 0, 3) };
		# Pad with leading zeros as necessary
		$month = sprintf("%02d",$month);
		$day = sprintf("%02d",$day);
		$year = sprintf("%02d",$year);

		# Find spam score and round to nearest integer
		$line =~ m/hits=(.*) tests=/;
		if ($1 ne "") {		# Then we did match something
			$score = sprintf("%.0f",$1);
			$score = int $score;

			# Keep track of min and max scores
			if ($score > $maxscore) {
				$maxscore = $score;
			} elsif ($score < $minscore) {
				$minscore = $score;
			}
			
			$spamkey = ($year+2000)."-".$month."-".$day;
			$datehash{$spamkey}{$score} = $datehash{$spamkey}{$score} + 1;

			if ($score >= $SPAMLEVEL) {
				$spam{$spamkey} = $spam{$spamkey} + 1;
			} else {
				$ham{$spamkey} = $ham{$spamkey} + 1;
			}
		}
	}
}

foreach $spamkey (sort keys %datehash) {
	open(OUTFILE,">$spamkey.txt") || die "Could not open output file $spamkey\n";
	print OUTFILE "#$spamkey\n";
	print OUTFILE "#Spam=$spam{$spamkey}\n";
	print OUTFILE "#Ham=$ham{$spamkey}\n";

	# Convert $spamkey into date, in format 2003-05-04
#	($year,$month,$day) = split(/-/,$spamkey);
	# Print spam totals to running spam total file
#	print ALLSPAM "$month/$day/$year, $spam{$spamkey}\n"; 
#	print ALLHAM "$month/$day/$year, $ham{$spamkey}\n"; 
	print ALLSPAM "$spamkey, $spam{$spamkey}\n"; 
	print ALLHAM "$spamkey, $ham{$spamkey}\n"; 

	# Tell GNU to replot each day
	print GNU "set title '$spamkey'\n";
	print GNU "set terminal png color\n";
	print GNU "set output '$spamkey.png'\n";
	print GNU "plot '$spamkey.txt' title 'Mail Count' with steps, '$spamkey.txt' smooth bezier title 'Bezier Approximation', 0 title 'Total Spams = $spam{$spamkey}' ls 1, 0 title 'Total Hams = $ham{$spamkey}' ls 1\n";

	for ($score = $minscore; $score <= $maxscore; $score++) {
		$total = int $datehash{$spamkey}{$score};
		print OUTFILE "$score, $total\n";
	}
}

@args = ("$GNUPLOT", "$GNUSCRIPT");
system (@args);
