#!/usr/bin/perl

#######################################################################
# Copyright Maarten de Boer <mdeboer@iua.upf.es>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#######################################################################
# This perl script allows users to create IMAP folders Learn-SPAM and
# Learn-HAM and move mail there (typically false positives and negatives)
# to train the SpamAssassin Bayes filter. The script looks for all these
# folders, and copies/moves the messages to a local spool, and calls 
# sa-learn. (All actions are optional. See usage)
#######################################################################

use Mail::IMAPClient; 
   
$action = $ARGV[0];

if ($action eq "")
{
	print "Usage:\n\n";
	print "  imap-sa-learn.pl [action]\n\n";
	print "  where [action] is a combination of\n\n";
	print "  h  only do ham\n";
	print "  s  only do spam\n";
	print "  i  show info for each message (subject) ( implies r)\n";
	print "  r  read messages from IMAP\n";
	print "  d  delete messages from IMAP\n";
	print "  w  write messages ( implies r)\n";
	print "  m  move messages ( = rdw )\n";
	print "  c  copy messages ( implies rw)\n";
	print "  l  learn (call sa-learn)\n";
	print "  x  clear copied files\n";
	print "  a  do it all! ( = hsimlx )\n\n";
	exit(0);
}

# some actions depend on or imply other actions
if ($action =~ m/a/) { $action = $action."hsimlx"; }
if ($action =~ m/m/) { $action = $action."rdw"; }
if ($action =~ m/w/) { $action = $action."r"; }
if ($action =~ m/i/) { $action = $action."r"; }
if ($action =~ m/c/) { $action = $action."rw"; }

# if not spam or ham specific, do both
if ( !($action =~ m/s/) && ! ($action =~ m/h/) )
{
	$action = $action."sh";
}

# setup a hash bashed on the action string
# see usage for explication of each key
my %do;
my $a;
foreach $a (split(/,*/,$action))
{	
	$do{$a} = 1;
}

# local temp folder for learning
my $dir = "/var/spool/MailScanner/learning/";

my $k = 0; # counter

if ($do{r} || $do{d})
{
	my $imap = Mail::IMAPClient->new( 
	Server  => "XXXXXXXXX", 
	User    => "XXXXXXXXX",
	Password=> "XXXXXXXXX",
	Uid => 1,
	)
	or die "$@";

	# go thru all folders and look for the Learn-SPAM and Learn-HAM folders
	# of each users
	for my $f ( $imap->folders ) {
		my $type = "";
		my $typef;
		if ($do{s})
		{
			if ($f =~ m/.Learn-SPAM\d?$/) { $type="spam"; $typef = "spam"; }
		}
		if ($do{h})
		{
			if ($f =~ m/.Learn-HAM\d?$/) { $type="ham"; $typef = "ham "; }
		}
		if ($type)
		{
			# adjust acl for these folders so we can actually read and delete
			# the messages
			$imap->setacl($f,"cyrus","lrswipcda") 
			or die "Cannot setacl for $f: $@\n";
			$imap->select($f) 
			or die "Cannot select for $f: $@\n";
			
			# go thru all messages in the current folder
			my @msgs = $imap->search("ALL");
			my $i = 0;
			print "Folder:  $f\n";
			foreach $msg (@msgs)
			{
				my $string;
				if ($do{r})
				{
					$string = $imap->message_string($msg);
					if (!$string) { die "message_string failed for $f:$msg\n"; }
				}
				if ($do{i})
				{
					$string =~ m/(Subject: .*?)[\r|\n]/;
					print "$1\n";
				}
				if ($do{w})
				{

					# remove Spam marked - we should not train on that,
					# but users might move uncorrectly marked mail to
					# the Learn-HAM folder, or correctly marked mail to
					# the Learn-SPAM folder to 
					$string =~ s/Subject: \{Spam\?\} ?/Subject: /;

					# write the message to the local temp spool for learning
					open OUTFILE,">$dir/$type/$$.$k\n" or die "cannot open $dir/$type/$$.$k for writing\n";
					print OUTFILE "$string";
					close OUTFILE;
				}
				$k++;
				$i++;
				if ($do{d})
				{
					$imap->delete_message($msg) or die "failed to delete $msg\n";
				}
			}
			if ($do{r})
			{
				printf "%4d/%4d $typef messages read from folder $f\n",$i,$#msgs+1;
			}
			if ($do{w})
			{
				printf "%4d/%4d $typef messages written to local $type folder\n",$i,$#msgs+1;
			}
			if ($do{d})
			{
				printf "%4d/%4d $typef messages deleted from folder $f\n",$i,$#msgs+1;
				$imap->expunge() or die "failed to expunge\n";
				$imap->close() or die "failed to close\n";
			}
		}
	}
	$imap->logout();
}

# call sa-learn
if ($do{l})
{
	if ($do{h})
	{
	print "Learning HAM\n";
	$err = 
		system "sa-learn","--ham",
		"--prefs-file","/etc/MailScanner/spam.assassin.prefs.conf",
		"$dir/ham";
	if ($err) { die "Learning ham failed\n"; }
	}
	if ($do{s})
	{
	print "Learning SPAM\n";
	$err = 
		system "sa-learn","--spam",
		"--prefs-file","/etc/MailScanner/spam.assassin.prefs.conf",
		"$dir/spam";
	if ($err) { die "Learning spam failed\n"; }
	}
}

# clear the local temp folders
if ($do{x})
{
	if ($do{h})
	{
		print "Clearing local ham folder\n";
		system "rm -f $dir/ham/*";
	}
	if ($do{s})
	{
		print "Clearing local spam folder\n";
		system "rm -f $dir/spam/*";
	}
}
