Steffen Mueller wrote:
No worries. Thanks for diving into the guts of PAR::Packer to implement
the feature! My only criticism is that I somehow feel this would be
better put into Module::ScanDeps which does the actual dependency
checking.
Hello,
in my first attempt I tried to cache the whole dependency tree for a given
project. This makes the user responsible to decide wether using the cache is
appropriate or not and can break in various ways.
Now I tried a different approach caching the results of M::SD routines
scan_line/scan_chunk which can be done reliably without requiring any user
decisions.
As a first step I created a patch for M::SD which adds a cache_cb option to
scan_deps/scan_deps_static.
It expects a callback routine which if present will be called before a file is
scanned:
$found in cache = $cache_cb->(action => 'read',
key => $key,
file => $file,
modules => \...@pm,
);
The routine is expected to populate @pm and return true if a valid cache entry
is found in which case M::SD will skip the scan and use the cached result. If
the routine returns false, the file will be scanned by M::SD and the routine
will be called again in order to set/update the cache entry like so:
$cache_cb->(action => 'write',
key => $key,
file => $file,
modules => \...@pm,
);
I attach the M::SD patch against SVN trunk and an example script. Performance
gain is not as extreme as in the first example I posted.
testMSD>perl scan.pl
run no. 1 took : 28 sec
run no. 2 took : 7 sec
ok 1 - result ok
#
run no. 3 took : 7 sec
ok 2 - result ok
#
1..2
Caching code could go to PAR::Packer or to a separate module like
M::SD::DependencyCache.
Thanks for any feedback.
Cheers, Christoph
Btw. there seems to be a bug with {type} of *.bs files sometimes being set to
'data' sometimes being set to 'autoload'.
--- C:\DOKUME~1\chris\LOKALE~1\Temp\ScanDeps.pm-revBASE.svn001.tmp.pm So Jun
28 18:38:19 2009
+++ C:\devel\M-SD_wc\trunk\lib\Module\ScanDeps.pm So Jun 28 18:20:40 2009
@@ -1,5 +1,5 @@
package Module::ScanDeps;
-
+#print "MSD testing\n";
use 5.006;
use strict;
use vars qw( $VERSION @EXPORT @EXPORT_OK @ISA $CurrentPackage @IncludeLibs
$ScanFileRE );
@@ -500,7 +500,7 @@
return $inc_name;
}
-my $Keys = 'files|keys|recurse|rv|skip|first|execute|compile|warn_missing';
+my $Keys =
'files|keys|recurse|rv|skip|first|execute|compile|warn_missing|cache_cb';
sub scan_deps {
my %args = (
rv => {},
@@ -562,8 +562,12 @@
sub scan_deps_static {
my ($args) = @_;
- my ($files, $keys, $recurse, $rv, $skip, $first, $execute, $compile,
$_skip) =
- @$args{qw( files keys recurse rv skip first execute compile _skip )};
+ my ($files, $keys, $recurse, $rv,
+ $skip, $first, $execute, $compile,
+ $cache_cb, $_skip)
+ = @$args{qw( files keys recurse rv
+ skip first execute compile
+ cache_cb _skip )};
$rv ||= {};
$_skip ||= { %{$skip || {}} };
@@ -575,66 +579,62 @@
and $file ne lc($file) and $_skip->{lc($file)}++;
next unless $file =~ $ScanFileRE;
- local *FH;
- open FH, $file or die "Cannot open $file: $!";
+ my @pm;
+ my $found_in_cache;
+ if ($cache_cb){
+ my $pm_aref;
+ # cache_cb populates \...@pm on success
+ $found_in_cache = $cache_cb->(action => 'read',
+ key => $key,
+ file => $file,
+ modules => \...@pm,
+ );
+ unless( $found_in_cache ){
+ @pm = scan_file($file);
+ $cache_cb->(action => 'write',
+ key => $key,
+ file => $file,
+ modules => \...@pm,
+ );
+ }
+ }else{ # no caching callback given
+ @pm = scan_file($file);
+ }
- $SeenTk = 0;
- # Line-by-line scanning
- LINE:
- while (<FH>) {
- chomp(my $line = $_);
- foreach my $pm (scan_line($line)) {
- last LINE if $pm eq '__END__';
+ foreach my $pm (@pm){
+ add_deps(
+ used_by => $key,
+ rv => $args->{rv},
+ modules => [$pm],
+ skip => $args->{skip},
+ warn_missing => $args->{warn_missing},
+ );
- # Skip Tk hits from Term::ReadLine and Tcl::Tk
- my $pathsep = qr/\/|\\|::/;
- if ($pm =~ /^Tk\b/) {
- next if $file =~
/(?:^|${pathsep})Term${pathsep}ReadLine\.pm$/;
- next if $file =~ /(?:^|${pathsep})Tcl${pathsep}Tk\W/;
- }
+ my $preload = _get_preload($pm) or next;
- if ($pm eq '__POD__') {
- while (<FH>) { last if (/^=cut/) }
- next LINE;
- }
-
- $pm = 'CGI/Apache.pm' if $file =~ /^Apache(?:\.pm)$/;
-
- add_deps(
- used_by => $key,
- rv => $args->{rv},
- modules => [$pm],
- skip => $args->{skip},
- warn_missing => $args->{warn_missing},
- );
-
- my $preload = _get_preload($pm) or next;
-
- add_deps(
- used_by => $key,
- rv => $args->{rv},
- modules => $preload,
- skip => $args->{skip},
- warn_missing => $args->{warn_missing},
- );
- }
+ add_deps(
+ used_by => $key,
+ rv => $args->{rv},
+ modules => $preload,
+ skip => $args->{skip},
+ warn_missing => $args->{warn_missing},
+ );
}
- close FH;
-
- # }}}
}
# Top-level recursion handling {{{
+
while ($recurse) {
my $count = keys %$rv;
my @files = sort grep -T $_->{file}, values %$rv;
scan_deps_static({
- files => [ map $_->{file}, @files ],
- keys => [ map $_->{key}, @files ],
- rv => $rv,
- skip => $skip,
- recurse => 0,
- _skip => $_skip,
+ files => [ map $_->{file}, @files ],
+ keys => [ map $_->{key}, @files ],
+ rv => $rv,
+ skip => $skip,
+ recurse => 0,
+ cache_cb => $cache_cb,
+ _skip => $_skip,
}) or ($args->{_deep} and return);
last if $count == keys %$rv;
}
@@ -691,6 +691,40 @@
return ($rv);
}
+sub scan_file{
+ my $file = shift;
+ my %found;
+ my $FH;
+ open $FH, $file or die "Cannot open $file: $!";
+
+ $SeenTk = 0;
+ # Line-by-line scanning
+ LINE:
+ while (<$FH>) {
+ chomp(my $line = $_);
+ foreach my $pm (scan_line($line)) {
+ last LINE if $pm eq '__END__';
+
+ # Skip Tk hits from Term::ReadLine and Tcl::Tk
+ my $pathsep = qr/\/|\\|::/;
+ if ($pm =~ /^Tk\b/) {
+ next if $file =~ /(?:^|${pathsep})Term${pathsep}ReadLine\.pm$/;
+ next if $file =~ /(?:^|${pathsep})Tcl${pathsep}Tk\W/;
+ }
+ if ($pm eq '__POD__') {
+ while (<$FH>) {
+ last if (/^=cut/);
+ }
+ next LINE;
+ }
+ $pm = 'CGI/Apache.pm' if $file =~ /^Apache(?:\.pm)$/;
+ $found{$pm}++;
+ }
+ }
+ close $FH or die "Cannot close $file: $!";
+ return keys %found;
+}
+
sub scan_line {
my $line = shift;
my %found;
use strict;
use warnings;
use Data::Dumper;
use Digest::MD5;
use Carp;
use Module::ScanDeps;
use Test::More q/no_plan/;
my $filename = 'test.pl';
open my $fh, '>', $filename or die "can't open file : $!\n";
print $fh "use Tk;\n";
close $fh or die "can't close file : $!";
my %cache;
my $start = time();
my $hash_ref;
for my $run(1..3){
my $c_hash_ref = scan_deps(
files => [ $filename ],
recurse => 1,
cache_cb => \&cb_func,
);
my $elapsed = time() - $start;
print "run no. $run took : $elapsed sec\n";
normalize( $c_hash_ref );
is_deeply($hash_ref, $c_hash_ref, "result ok\n") if ( $hash_ref );
$hash_ref = $c_hash_ref;
$start = time();
}
sub cb_func{
my %args = @_;
if ( $args{action} eq 'read' ){
return read_cache( %args );
}
elsif ( $args{action} eq 'write' ){
return write_cache( %args );
}
croak "action must be read or write\n";
}
sub read_cache{
my %args = @_;
my ($key, $file, $mod_aref) = @args{qw/key file modules/};
return 0 unless (exists $cache{$key});
### we have an entry - check MD5
my $entry = $cache{$key};
my $checksum = file_2_md5($file);
if ($entry->{checksum} eq $checksum){
@$mod_aref = @{$entry->{modules}};
return 1;
}
return 0;
}
sub write_cache{
my %args = @_;
my ($key, $file, $mod_aref) = @args{qw/key file modules/};
my $entry = $cache{$key} ||= {};
my $checksum = file_2_md5($file);
$entry->{checksum} = $checksum;
$entry->{modules} = [...@$mod_aref];
return 1;
}
sub file_2_md5{
my $file = shift;
open my $fh, '<', $file or die "can't open $file: $!";
my $md5 = Digest::MD5->new;
$md5->addfile($fh);
close $fh or die "can't close $file: $!";
return $md5->hexdigest;
}
# prepare for passing to is_deeply
sub normalize{
my $href = shift;
foreach my $entry(values %$href){
$entry->{$_} = [sort @{$entry->{$_}||=[]}] for (qw/uses used_by/);
#ignore existing M::SD bug with *.bs files and {type} property:
$entry->{type}= 'data' if ($entry->{type} eq 'autoload');
}
}