Hello Mark, i did some research on optimizing Amavis/Spamassassin in our test setup with NYTProf. I found that replacing the call to the external file(1) tool with calls to File::LibMagic increased the throughput from 4.1 to 4.8 msg/s. It also simplifies the code in determine_file_types().
Markus
--- amavisd.patched-ctch 2014-07-10 15:01:01.162732554 +0200
+++ amavisd.libmagic 2014-07-10 15:51:04.054289054 +0200
@@ -27318,7 +27318,7 @@
BEGIN {
require Exporter;
- use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
+ use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $magic);
$VERSION = '2.316';
@ISA = qw(Exporter);
@EXPORT_OK = qw(&init &decompose_part &determine_file_types);
@@ -27331,6 +27331,9 @@
import Amavis::Lookup qw(lookup lookup2);
import Amavis::Unpackers::MIME qw(mime_decode);
import Amavis::Unpackers::NewFilename qw(consumed_bytes);
+
+ use File::LibMagic;
+ $magic = File::LibMagic->new;
}
use subs @EXPORT_OK;
@@ -27431,52 +27434,15 @@
# call 'file(1)' utility for each part,
# and associate (save) full and short file content types with each part
#
+
sub determine_file_types($$) {
my($tempdir, $partslist_ref) = @_;
- defined $file && $file ne ''
- or die "Unix utility file(1) not available, but is needed";
my(@all_part_list) = grep($_->exists, @$partslist_ref);
my $initial_num_parts = scalar(@all_part_list);
- my $cwd = "$tempdir/parts";
- if (@all_part_list) { chdir($cwd) or die "Can't chdir to $cwd: $!" }
- my($proc_fh,$pid); my $eval_stat;
- eval {
- while (@all_part_list) {
- my(@part_list,@file_list); # collect reasonably small subset of filenames
- my $arglist_size = length($file); # size of a command name itself
- while (@all_part_list) { # collect as many args as safe, at least one
- my $nm = $all_part_list[0]->full_name;
- local($1); $nm =~ s{^\Q$cwd\E/(.*)\z}{$1}s; # remove cwd from filename
- # POSIX requires 4 kB as a minimum buffer size for program arguments
- last if @file_list && $arglist_size + length($nm) + 1 > 4000;
- push(@part_list, shift(@all_part_list)); # swallow the next one
- push(@file_list, $nm); $arglist_size += length($nm) + 1;
- }
- if (scalar(@file_list) < $initial_num_parts) {
- do_log(2, "running file(1) on %d (out of %d) files, arglist size %d",
- scalar(@file_list), $initial_num_parts, $arglist_size);
- } else {
- do_log(5, "running file(1) on %d files, arglist size %d",
- scalar(@file_list), $arglist_size);
- }
- ($proc_fh,$pid) = run_command(undef, '&1', $file, @file_list);
- my $index = 0; my $ln;
- for ($! = 0; defined($ln=$proc_fh->getline); $! = 0) {
- do_log(5, "result line from file(1): %s", $ln);
- chomp($ln); local($1,$2);
- if ($index > $#file_list) {
- do_log(-1,"NOTICE: Skipping unexpected output from file(1): %s",$ln);
- } else {
- my $part = $part_list[$index]; # walk through @part_list in sync
- my $expect = $file_list[$index]; # walk through @file_list in sync
- if ($ln !~ /^(\Q$expect\E):[ \t]*(.*)\z/s) {
- # split file name from type
- do_log(-1,"NOTICE: Skipping bad output from file(1) ".
- "at [%d, %s], got: %s", $index,$expect,$ln);
- } else {
- my $type_short; my $actual_name = $1; my $type_long = $2;
- $type_short =
- lookup2(0,$type_long,\@map_full_type_to_short_type_maps);
+
+ foreach my $part ( @all_part_list ) {
+ my $type_long = $magic->describe_filename($part->full_name);
+ my $type_short = lookup2(0,$type_long,\@map_full_type_to_short_type_maps);
ll(4) && do_log(4, "File-type of %s: %s%s",
$part->base_name, $type_long,
(!defined $type_short ? ''
@@ -27487,40 +27453,8 @@
$part->attributes_add('C') # simpleminded
if !ref($type_short) ? $type_short eq 'pgp' # encrypted?
: grep($_ eq 'pgp', @$type_short);
- $index++;
- }
- }
- }
- defined $ln || $! == 0 || $! == EAGAIN
- or die "Error reading from file(1) utility: $!";
- do_log(-1,"unexpected(file): %s",$!) if !defined($ln) && $! == EAGAIN;
- my $err = 0; $proc_fh->close or $err = $!;
- my $child_stat = defined $pid && waitpid($pid,0) > 0 ? $? : undef;
- undef $proc_fh; undef $pid; my(@errmsg);
- # exit status is 1 when result is 'ERROR: ...', accept it mercifully
- proc_status_ok($child_stat,$err, 0,1)
- or push(@errmsg, "failed, ".exit_status_str($child_stat,$err));
- if ($index < @part_list) {
- push(@errmsg, sprintf("parsing failure - missing last %d results",
- @part_list - $index));
- }
- !@errmsg or die join(", ",@errmsg);
- # even though exit status 1 is accepted, log a warning nevertheless
- proc_status_ok($child_stat,$err)
- or do_log(-1, "file utility failed: %s",
- exit_status_str($child_stat,$err));
- }
- 1;
- } or do {
- $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
- kill_proc($pid,$file,1,$proc_fh,$eval_stat) if defined $pid;
- };
- chdir($tempdir) or die "Can't chdir to $tempdir: $!";
- section_time(sprintf('get-file-type%d', $initial_num_parts));
- if (defined $eval_stat) {
- do_log(-2, "file(1) utility (%s) FAILED: %s", $file,$eval_stat);
- # die "file(1) utility ($file) error: $eval_stat";
}
+ section_time(sprintf('get-file-type%d', $initial_num_parts));
}
sub decompose_mail($$) {
signature.asc
Description: Digital signature
