Chris Ryland wrote:
I'm thinking of switching from a manual retraining scheme (forwarding junk folder messages under Mac OS X using an Applescript to pick out the DSPAM signature and forward a message to spam@<myhost>.com) to a server-based auto-retraining scheme that would take all mail in all users's Junk folders (again, these are mostly Mail.app and Entourage users logging into my Mac OS X Server 10.4 system with cyrus as the IMAP server).

I can see how to do the message forwarding on the server (once the message is old enough for the user to have reviewed his spam folder) but don't know enough about cyrus to know how to delete a message from its database once it's been forwarded to the spam server.

Does anyone have experience with this kind of setup?

Thanks &
Cheers!
--Chris Ryland / Em Software, Inc. / www.emsoftware.com



Hello, this might help you with what you want to happen.... I sort of got it from Columbia university (search for cal-dspam).... just create a Spam and Ham IMAP folder for each user, run the attached script as a server side job... I modified the script a bit to work for virtual domain users (usernames like [EMAIL PROTECTED]). You might be able to improve on it... ^_^

--
Peter Santiago         [EMAIL PROTECTED]
My website:            www.psinergybbs.com
My spamtrap address:   [EMAIL PROTECTED]

#!/usr/bin/perl -w 
 
use Mail::IMAPClient; 
#use IPC::Open3; 
use IO::Socket::UNIX; 
use IO::Socket; 
use Socket; 
use Data::Dumper; 
 
## cal-dspam-process: pull messages from a cyrus imap server and pass 
## them off to dspam for retraining. 
 
#  suggested invocation (on sedna, at any rate): 
## k5start -U -f ~/dspam.keytab -S imap -I sedna.astro.columbia.edu 
cal-dspam-process retrain 
 
my $action = $ARGV[0]; 
$action =~ m/^(report|retrain|sweep)$/  or die "Must choose a legitimate action 
(report, retrain, or sweep)";  
 
my $debug = (defined($ARGV[1]) && ('--verbose' eq $ARGV[1])); 
 
## read, write, error file handles: 
my ($wfh,$rfh,$efh); 
 
my ($server,$socket) = ('localhost', '/var/lib/imap/socket/imap'); 
 
#my $imt = 'imtest -x '.$socket.' '.$server; 
 
#my $pid = open3($wfh,$rfh,$efh,$imt); 
 
#warn "$imt opened process $pid\n" if $debug; 
 
#my $line = ''; 
 
#until ($line =~ /^Security strength factor:/i ) { 
#        defined($line = <$rfh>) or die "EOF\n"; 
#        print STDERR "Prolog: $line" if $debug; 
#} 
 
## why should we need to sleep?  in case the socket is not set up yet. 
## FIXME: better than a default sleep would be a poll on the socket as it 
exists. 
#sleep 1; 
#my $sock = IO::Socket::UNIX->new("$socket") 
#  or die "No socket: $!\n"; 
 
#print STDERR "<<<END OF PROLOG>>>\n" if $debug; 
my $imap = Mail::IMAPClient->new (Server=>'localhost',User => 'dspam', Password 
=> 'novirus');
$imap->Prewritemethod(\&Mail::IMAPClient::Strip_cr); 
$imap->Debug($debug); 
$imap->Debug_fh(\*STDERR); 
$imap->State($imap->Connected); 
#$imap->Socket($socket); 
 
 
sub folderscan { 
  my $imap = shift(); 
  my $foldername = shift(); 
  my $dspamclass = shift(); 
  #my @folders = $imap->list("",'user/*/'.$foldername.'@*'); 
  my @folders = $imap->list();
  #foreach $folder (@folders) {warn "Scanning: . $folder"};
  foreach $folder (@folders) { 
    if ($folder =~ s/^\* LIST .* "(user\/.*\/[EMAIL 
PROTECTED])"[[:space:]]*$/$1/ ) { 
      warn "Getting $folder"; 
      my $user = $folder; 
#      $user =~ s/^user\/(.*)\/[EMAIL PROTECTED]"$/$1/; 
      $user =~ s/\/$foldername//;
      $user =~ s/user\///;
      warn "User: $user";
      my $msgcount = $imap->message_count($folder);
      
      $imap->setacl($folder,'dspam','write')
                       or die "Could not set acl: [EMAIL PROTECTED]";
      
      if (defined($msgcount) && ($msgcount > 0)) { 
        warn "Checking on $folder (for user $user) with $msgcount messages\n"; 
        $imap->Select($folder); 
        my $fieldspec = "BODY[HEADER.FIELDS (X-DSPAM-Signature)]"; 
        my $hash = $imap->fetch_hash($fieldspec); 
        my @dealtwith = (); 
        my @nosigs = (); 
        my @failures = (); 
 
        while (($msgid, $data) = each (%$hash)) { 
          my $sig = $data->{$fieldspec}; 
          $sig =~ s/^X-DSPAM-Signature: //; 
          warn "$msgid: signature is $sig\n" if $debug; 
          if ($sig ne '') { 
              ## if the signature is good, invoke an external dspam process to 
retrain the learner 
            my $dspaminvocation = 'dspamc --client --user '.$user.' 
--class='.$dspamclass.' --source=error --signature='.$sig; 
            if ($action eq 'retrain') { 
              my $dspamval = system($dspaminvocation); 
              warn "$dspaminvocation returned $dspamval\n" if ($dspamval); 
              ## and mark it for transfer to the processed messages if 
              ## we were successful in invoking dspam: 
              if ($dspamval == 0) { 
                push(@dealtwith, $msgid); 
              } else { 
                push(@failures, $msgid); 
              } 
            } else { 
              warn "Would invoke \"$dspaminvocation\"\n" if $debug; 
            } 
          } else { 
            push(@nosigs, $msgid); 
            if ($action eq 'report') { 
              ## otherwise, what should we do?  just feed it as a corpus 
              ## message?  How should we extract the full text? 
              warn "Dealing with message $msgid which does not have a DSPAM 
signature:\n" if $debug; 
              warn $imap->message_string($msgid) if $debug; 
            } 
          } 
        } 
        # if we've dealt with at least one item: 
        foreach $dealtwith (@dealtwith){print "$dealtwith \n"}
        if (scalar @dealtwith) { 
          my $newf = 'user/dspam/processed'; 
          $imap->exists($newf) or $imap->create($newf) or warn "Could not 
create $newf\n"; 
          $newf .= '/'.$user;
          #print "+User: $newf\n";
          $newf =~ s/\@/\./g;
          $imap->exists($newf) or $imap->create($newf) or warn "Could not 
create $newf\n"; 
          $newf .= '/'.$foldername;           
          #print "+Folder: $newf\n";
          $imap->exists($newf) or $imap->create($newf) or warn "Could not 
create $newf\n"; 
         ## it's either this, or delete each message.  this strategy is less 
vicious: 
          my $moveval = $imap->move($newf, [EMAIL PROTECTED]); 
          warn "move returned $moveval\n"; 
        } 
        warn "TROUBLE: ".(scalar @nosigs)." messages in $folder without a dspam 
signature\n" if (scalar @nosigs); 
        # close() does an implicit expunge 
        $imap->close() or warn "failed to close folder $folder\n"; 

        ## print Data::Dumper->Dumpxs([$hash],['$hash']) if $debug; 
      } 
    } 
  } 
} 
 
folderscan($imap, 'HAM', 'innocent'); 
folderscan($imap, 'SPAM', 'spam'); 
 
$imap->logout; 
 
print STDERR "<<<END>>>\n" if $debug; 
 
exit; 

Reply via email to