Hello there,
since I read it in the todo of the OCR Plugin that the words are still
hardcoded and since I wanted to contribute something too, I modified the
plugin so the words can be configured in the cf file...
I have attached both the new pm file and the cf file.
If you like it, please put it in the spamassassin wiki so others profit,
too.
Have fun, and to the author, great plugin and great idea :)
Cheers
Chris
loadplugin Ocr Ocr.pm
body OCR eval:check_ocr()
describe OCR Check if text in attached images contains spam words
score OCR 4.5
#Here we defined the words to scan for
ocr_word stock
ocr_word international
ocr_word company
ocr_word money
ocr_word stock
ocr_word million
ocr_word thousand
ocr_word buy
ocr_word price
ocr_word trade
# Ocr plugin, version 2
package Ocr;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Util;
use Mail::SpamAssassin::Plugin;
our @ISA = qw (Mail::SpamAssassin::Plugin);
our @words = ( );
# constructor: register the eval rule
sub new {
my ( $class, $mailsa ) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless( $self, $class );
$self->register_eval_rule("check_ocr");
return $self;
}
sub parse_config {
my ($self, $opts) = @_;
if ($opts->{key} eq "ocr_word") {
push(@words, $opts->{value});
}
}
sub check_ocr {
my ( $self, $pms ) = @_;
my $cnt = 0;
foreach my $p ( $pms->{msg}->find_parts("image") ) {
my ( $ctype, $boundary, $charset, $name ) =
Mail::SpamAssassin::Util::parse_content_type(
$p->get_header('content-type') );
if ( $ctype eq "image/gif" ) {
open OCR, "|/usr/bin/convert - pnm:-|/usr/bin/gocr -i - > /tmp/spamassassin.ocr.$$";
foreach $p ( $p->decode() ) {
print OCR $p;
}
close OCR;
open OCR, "/tmp/spamassassin.ocr.$$";
while (<OCR>) {
my $w;
foreach $w (@words) {
if (m/$w/i) {
$cnt++;
}
}
}
unlink "/tmp/spamassassin.ocr.$$";
}
}
return ( $cnt > 1 );
}
1;