Steffen Mueller wrote:


No worries. Thanks for diving into the guts of PAR::Packer to implement the feature! My only criticism is that I somehow feel this would be better put into Module::ScanDeps which does the actual dependency checking.

Hello,

in my first attempt I tried to cache the whole dependency tree for a given
project. This makes the user responsible to decide wether using the cache is
appropriate or not and can break in various ways.
Now I tried a different approach caching the results of M::SD routines scan_line/scan_chunk which can be done reliably without requiring any user decisions.
 As a first step I created a patch for M::SD which adds a cache_cb option to
scan_deps/scan_deps_static.
It expects a callback routine which if present will be called before a file is
scanned:
$found in cache = $cache_cb->(action => 'read',
                               key    => $key,
                               file   => $file,
                               modules => \...@pm,
                           );
The routine is expected to populate @pm and return true if a valid cache entry
is found in which case M::SD will skip the scan and use the cached result. If
the routine returns false, the file will be scanned by M::SD and the routine
will be called again in order to set/update the cache entry like so:
   $cache_cb->(action => 'write',
               key    => $key,
               file   => $file,
               modules => \...@pm,
            );

I attach the M::SD patch against SVN trunk and an example script. Performance gain is not as extreme as in the first example I posted.
testMSD>perl scan.pl
run no. 1  took : 28 sec
run no. 2  took : 7 sec
ok 1 - result ok
#
run no. 3  took : 7 sec
ok 2 - result ok
#
1..2

Caching code could go to PAR::Packer or to a separate module like M::SD::DependencyCache.
Thanks for any feedback.

Cheers, Christoph

Btw. there seems to be a bug with {type} of *.bs files sometimes being set to 'data' sometimes being set to 'autoload'.

--- C:\DOKUME~1\chris\LOKALE~1\Temp\ScanDeps.pm-revBASE.svn001.tmp.pm   So Jun 
28 18:38:19 2009
+++ C:\devel\M-SD_wc\trunk\lib\Module\ScanDeps.pm       So Jun 28 18:20:40 2009
@@ -1,5 +1,5 @@
 package Module::ScanDeps;
-
+#print "MSD testing\n";
 use 5.006;
 use strict;
 use vars qw( $VERSION @EXPORT @EXPORT_OK @ISA $CurrentPackage @IncludeLibs 
$ScanFileRE );
@@ -500,7 +500,7 @@
     return $inc_name;
 }
 
-my $Keys = 'files|keys|recurse|rv|skip|first|execute|compile|warn_missing';
+my $Keys = 
'files|keys|recurse|rv|skip|first|execute|compile|warn_missing|cache_cb';
 sub scan_deps {
     my %args = (
         rv => {},
@@ -562,8 +562,12 @@
 
 sub scan_deps_static {
     my ($args) = @_;
-    my ($files, $keys, $recurse, $rv, $skip, $first, $execute, $compile, 
$_skip) =
-        @$args{qw( files keys recurse rv skip first execute compile _skip )};
+    my ($files,  $keys, $recurse, $rv,
+        $skip,  $first, $execute, $compile,
+        $cache_cb, $_skip)
+        = @$args{qw( files keys  recurse rv
+                     skip  first execute compile
+                     cache_cb _skip )};
 
     $rv   ||= {};
     $_skip ||= { %{$skip || {}} };
@@ -575,66 +579,62 @@
           and $file ne lc($file) and $_skip->{lc($file)}++;
         next unless $file =~ $ScanFileRE;
 
-        local *FH;
-        open FH, $file or die "Cannot open $file: $!";
+        my @pm;
+        my $found_in_cache;
+        if ($cache_cb){
+            my $pm_aref;
+            # cache_cb populates \...@pm on success
+            $found_in_cache = $cache_cb->(action => 'read',
+                                          key    => $key,
+                                          file   => $file,
+                                          modules => \...@pm,
+                                      );
+            unless( $found_in_cache ){
+                @pm = scan_file($file);
+                $cache_cb->(action => 'write',
+                            key    => $key,
+                            file   => $file,
+                            modules => \...@pm,
+                        );
+            }
+        }else{ # no caching callback given
+            @pm = scan_file($file);
+        }
 
-        $SeenTk = 0;
-        # Line-by-line scanning
-        LINE:
-        while (<FH>) {
-            chomp(my $line = $_);
-            foreach my $pm (scan_line($line)) {
-                last LINE if $pm eq '__END__';
+        foreach my $pm (@pm){
+            add_deps(
+                     used_by => $key,
+                     rv      => $args->{rv},
+                     modules => [$pm],
+                     skip    => $args->{skip},
+                     warn_missing => $args->{warn_missing},
+                 );
 
-                # Skip Tk hits from Term::ReadLine and Tcl::Tk
-                my $pathsep = qr/\/|\\|::/;
-                if ($pm =~ /^Tk\b/) {
-                  next if $file =~ 
/(?:^|${pathsep})Term${pathsep}ReadLine\.pm$/;
-                  next if $file =~ /(?:^|${pathsep})Tcl${pathsep}Tk\W/;
-                }
+            my $preload = _get_preload($pm) or next;
 
-                if ($pm eq '__POD__') {
-                    while (<FH>) { last if (/^=cut/) }
-                    next LINE;
-                }
-
-                $pm = 'CGI/Apache.pm' if $file =~ /^Apache(?:\.pm)$/;
-
-                add_deps(
-                    used_by => $key,
-                    rv      => $args->{rv},
-                    modules => [$pm],
-                    skip    => $args->{skip},
-                    warn_missing => $args->{warn_missing},
-                );
-
-                my $preload = _get_preload($pm) or next;
-
-                add_deps(
-                    used_by => $key,
-                    rv      => $args->{rv},
-                    modules => $preload,
-                    skip    => $args->{skip},
-                    warn_missing => $args->{warn_missing},
-                );
-            }
+            add_deps(
+                     used_by => $key,
+                     rv      => $args->{rv},
+                     modules => $preload,
+                     skip    => $args->{skip},
+                     warn_missing => $args->{warn_missing},
+                 );
         }
-        close FH;
-
-        # }}}
     }
 
     # Top-level recursion handling {{{
+   
     while ($recurse) {
         my $count = keys %$rv;
         my @files = sort grep -T $_->{file}, values %$rv;
         scan_deps_static({
-            files   => [ map $_->{file}, @files ],
-            keys    => [ map $_->{key},  @files ],
-            rv      => $rv,
-            skip    => $skip,
-            recurse => 0,
-            _skip   => $_skip,
+            files    => [ map $_->{file}, @files ],
+            keys     => [ map $_->{key},  @files ],
+            rv       => $rv,
+            skip     => $skip,
+            recurse  => 0,
+            cache_cb => $cache_cb, 
+            _skip    => $_skip,
         }) or ($args->{_deep} and return);
         last if $count == keys %$rv;
     }
@@ -691,6 +691,40 @@
     return ($rv);
 }
 
+sub scan_file{
+    my $file = shift;
+    my %found;
+    my $FH;
+    open $FH, $file or die "Cannot open $file: $!";
+
+    $SeenTk = 0;
+    # Line-by-line scanning
+  LINE:
+    while (<$FH>) {
+        chomp(my $line = $_);
+        foreach my $pm (scan_line($line)) {
+            last LINE if $pm eq '__END__';
+
+            # Skip Tk hits from Term::ReadLine and Tcl::Tk
+            my $pathsep = qr/\/|\\|::/;
+            if ($pm =~ /^Tk\b/) {
+                next if $file =~ /(?:^|${pathsep})Term${pathsep}ReadLine\.pm$/;
+                next if $file =~ /(?:^|${pathsep})Tcl${pathsep}Tk\W/;
+            }
+            if ($pm eq '__POD__') {
+                while (<$FH>) {
+                    last if (/^=cut/);
+                }
+                next LINE;
+            }
+            $pm = 'CGI/Apache.pm' if $file =~ /^Apache(?:\.pm)$/;
+            $found{$pm}++;
+        }
+    }
+    close $FH or die "Cannot close $file: $!";
+    return keys %found;
+}
+
 sub scan_line {
     my $line = shift;
     my %found;
use strict;
use warnings;
use Data::Dumper;
use Digest::MD5;
use Carp;
use Module::ScanDeps;
use Test::More q/no_plan/;

my $filename = 'test.pl';
open my $fh, '>', $filename or die "can't open file : $!\n";
print $fh "use Tk;\n";
close $fh or die "can't close file : $!";

my %cache;

my $start = time();
my $hash_ref;
for my $run(1..3){
    my $c_hash_ref = scan_deps(
                          files    => [ $filename ],
                          recurse  => 1,
                          cache_cb => \&cb_func,
                      );
    my $elapsed = time() - $start;
    print "run no. $run  took : $elapsed sec\n";
    normalize( $c_hash_ref );
    is_deeply($hash_ref, $c_hash_ref, "result ok\n") if ( $hash_ref );
    $hash_ref = $c_hash_ref;
    $start = time();
}


sub cb_func{
    my %args = @_;
    if ( $args{action} eq 'read' ){
        return read_cache( %args );
    }
    elsif ( $args{action} eq 'write' ){
        return write_cache( %args );
    }
    croak "action must be read or write\n";
}

sub read_cache{
    my %args = @_;
    my ($key, $file, $mod_aref) = @args{qw/key file modules/};
    return 0 unless (exists $cache{$key});
    ### we have an entry - check MD5
    my $entry = $cache{$key};
    my $checksum = file_2_md5($file);
    if ($entry->{checksum} eq $checksum){
        @$mod_aref = @{$entry->{modules}};
        return 1;
    }
    return 0;
}

sub write_cache{
    my %args = @_;
    my ($key, $file, $mod_aref) = @args{qw/key file modules/};
    my $entry = $cache{$key} ||= {};
    my $checksum = file_2_md5($file);
    $entry->{checksum} = $checksum;
    $entry->{modules} = [...@$mod_aref];
    return 1;
}

sub file_2_md5{
    my $file = shift;
    open my $fh, '<', $file or die "can't open $file: $!";
    my $md5 = Digest::MD5->new;
    $md5->addfile($fh);
    close $fh or die "can't close $file: $!";
    return $md5->hexdigest;
}

# prepare for passing to is_deeply
sub normalize{
    my $href = shift;
    foreach my $entry(values %$href){
        $entry->{$_} = [sort @{$entry->{$_}||=[]}] for (qw/uses used_by/);
        #ignore existing M::SD bug with *.bs files and {type} property:
        $entry->{type}= 'data' if ($entry->{type} eq 'autoload');
    }
}

Reply via email to