Hello Joey,

I finally found the time to wrap up the gendelta extension that checks
the local tree for completeness.
After testing it against 10,000 tar balls from the debian archive I am
reasonably confident that it works correctly.

Please take a look.

Best regards

-- 
Muharem Hrnjadovic <[email protected]>
Public key id   : B2BBFCFC
Key fingerprint : A5A3 CC67 2B87 D641 103F  5602 219F 6B60 B2BB FCFC
=== added file 'Ptutils.pm'
--- Ptutils.pm	1970-01-01 00:00:00 +0000
+++ Ptutils.pm	2009-10-14 08:08:35 +0000
@@ -0,0 +1,283 @@
+#!/usr/bin/perl
+=head1 NAME
+
+ptutils - pristine-tar utilities
+
+=head1 DESCRIPTION
+
+Various utility functions used by the pristine-tar tool.
+
+=head1 FUNCTIONS
+
+=over 4
+
+=item checkmanifest
+
+Find all paths that are listed in the gendelta manifest but do *not* exist
+in the local tree.
+
+When the optional "localtree" argument is passed to gendelta it will check
+whether the files required are all present in the local tree and abort if this
+is not the case.
+
+=back
+
+=head1 LIMITATIONS
+
+The gentar gendelta "check local tree" extension was tested extensively
+against the debian tar ball archive but may still occasionally stumble over
+file names (in the tar-generated manifest) that are encoded in ways that
+pristine-tar does not understand, but tar does.
+
+=head1 AUTHOR
+
+Muharem Hrnjadovic <[email protected]>
+
+Licensed under the GPL, version 2 or above.
+
+=cut
+
+package Ptutils;
+
+use Exporter;
+...@isa = ("Exporter");
+...@export = qw(&checkmanifest);
+
+use strict;
+use warnings;
+use Cwd qw(realpath);
+use Data::Dumper;
+use File::Find;
+use File::Spec::Functions qw(abs2rel canonpath catdir splitdir);
+use List::MoreUtils qw(zip);
+
+my $debug=0;
+
+sub debug {
+	message(@_) if $debug;
+}
+
+sub message {
+	print STDERR "pristine-tar utils: @_\n";
+}
+
+sub checkpathsets {
+    # Check whether all files listed in the manifest are available locally for
+    # agiven pair of prefixes.
+    my $dir_paths=shift;    # local paths
+    my $tar_paths=shift;    # path listed in manifest
+    my $prefixes=shift;     # local/manifest path prefixes
+
+    # Strip off the prefixes.
+    normalizepaths($dir_paths, $tar_paths, $prefixes);
+
+    # Now let's see whether all paths in the manifest can be found locally.
+
+    # Strip off trailing slashes.
+    map { s,/*$,,; } @$prefixes;
+    map { s,/*$,,; } @$tar_paths;
+    debug(sprintf "!!! PFX '%s'", join("', '", @$prefixes));
+
+    # Compute path difference.
+    my %seen;
+    @seen {...@$tar_paths} = ();
+    delete @seen {...@$dir_paths};
+    delete @seen {...@$prefixes};
+
+    my @missing_in_tree = sort(keys %seen);
+    return \...@missing_in_tree;
+}
+
+sub checkmanifest {
+    # Find all paths that are listed in the manifest but do *not* exist in
+    # the local tree.
+    my $manifest=shift;
+    my $localtree_path=shift;
+
+    my @manifest_entries = ();
+    my @paths_in_tree = ();
+    my @missing_in_tree;
+
+    (-d $localtree_path) || die "!! No such directory: '$localtree_path', wrong path?";
+
+    find sub { push(@paths_in_tree, canonpath($File::Find::name)) }, abs2rel(realpath($localtree_path));
+
+    open(IN, "<", $manifest) || die "$!";
+    while (<IN>) {
+        chomp $_;
+        # Condense multiple slashes to one.
+        s,//+,/,;
+        # Unicode code points (e.g. \201) are read as text i.e. as four
+        # characters and *not* as a single byte. We convert them to bytes
+        # here.
+        s/\\(\d{3})/"chr(0$1)"/eeg;
+        push(@manifest_entries, $_);
+    }
+    close IN;
+
+    my @lps = @paths_in_tree;
+    my @tps = @manifest_entries;
+
+    # Figure out what the prefixes for the respective path sets might be.
+    my $dir_prefix = compute_representative_path_prefix(\...@lps);
+    my $tar_prefix = compute_representative_path_prefix(\...@tps);
+
+    return if !defined($dir_prefix) || !defined($tar_prefix);
+    my @prefixes;
+    if ($dir_prefix eq $tar_prefix) {
+        @prefixes = ('', '');
+    }
+    else {
+        @prefixes = ($dir_prefix, $tar_prefix);
+    }
+
+    my ($missing, $missing_in_2nd_attempt, $missing_in_3rd_attempt);
+
+    # Check whether we have all the paths required given the prefixes guessed.
+    $missing = checkpathsets(\...@lps, \...@tps, \...@prefixes);
+    debug(sprintf "-- 1st*AT: files missing, %s", scalar(@$missing));
+
+    if ((scalar(@$missing) > 0) && checkprefixes(\...@prefixes)) {
+        # This is in essence guesswork: we did not find all the paths required
+        # based on the prefixes in the first round.
+        # In this 'educated guess' attempt we try to find all that's needed w/o
+        # any prefixes whatsoever.
+        @lps = @paths_in_tree;
+        @tps = @manifest_entries;
+        $missing_in_2nd_attempt = checkpathsets(\...@lps, \...@tps, ['', '']);
+        debug(sprintf "-- 2nd*AT: files missing, %s", scalar(@$missing_in_2nd_attempt));
+
+        # All paths found? Great! Let the caller know.
+        return $missing_in_2nd_attempt if (scalar(@$missing_in_2nd_attempt) < scalar(@$missing));
+    }
+    if (scalar(@$missing) > 0) {
+        # A last ditch attempt: use the prefix found for the files in the tar
+        # manifest also for the files unpacked on the local file system.
+        switch_to_manifest_prefix(\...@prefixes);
+        @lps = @paths_in_tree;
+        @tps = @manifest_entries;
+        $missing_in_3rd_attempt = checkpathsets(\...@lps, \...@tps, \...@prefixes);
+        debug(sprintf "-- 3rd*AT: files missing, %s", scalar(@$missing_in_3rd_attempt));
+
+        # All paths found? Great! Let the caller know.
+        return $missing_in_3rd_attempt if (scalar(@$missing_in_3rd_attempt) < scalar(@$missing));
+    }
+    return $missing;
+}
+
+sub switch_to_manifest_prefix {
+    my $prefixes = shift;
+    my @ldirs = splitdir($$prefixes[0]);
+    my @tdirs = splitdir($$prefixes[1]);
+    $$prefixes[0] = catdir(($ldirs[0], @tdirs[1..$#tdirs]));
+    debug(sprintf "!!! 3rd*AT -> PFX '%s'", join("', '", @$prefixes));
+}
+
+sub checkprefixes {
+    my $prefixes = shift;
+    my ($a, $b) = @$prefixes;
+
+    my $a_is_prefix_of_b = ($a eq substr($b, 0, length $a));
+    my $b_is_prefix_of_a = ($b eq substr($a, 0, length $b));
+
+    return ($a_is_prefix_of_b || $b_is_prefix_of_a);
+}
+
+sub pick2paths {
+    # Pick 2 out of a list of paths so we can compute the common path prefix.
+    my $paths = shift;
+
+    return unless scalar(@$paths) > 1;
+
+    @$paths = sort @$paths;
+
+    # Return the first and the last path for reasons of diversity. This gives
+    # us the highest chance of getting the prefix right.
+    my @picks = ($$paths[0], $$paths[-1]);
+
+    debug(sprintf "/// SOR: %s", Dumper(\...@picks));
+    return \...@picks;
+}
+
+sub compute_representative_path_prefix {
+    # Given a set of paths, find the prefixes of a "representative" pair of
+    # paths. If the set consists of a sole path just return it.
+    my $paths = shift;
+
+    if (scalar(@$paths) == 1) {
+        return $$paths[0];
+    }
+
+    my $picks = pick2paths($paths);
+
+    return if !defined($picks);
+
+    my ($a, $b) = @$picks;
+
+    my @as = split(//, $a);
+    my @bs = split(//, $b);
+    my @zippedchars = zip @as, @bs;
+    my $counter = 0;
+    my $last_slash_seen_at = -1;
+    my ($achar, $bchar);
+    local *next_pair_of_chars = sub { @zippedchars[2*$counter..2*$counter+1] };
+    local *check_counter = sub { (defined($achar) && ($achar eq '/')) || (defined($bchar) && ($bchar eq '/')) };
+
+    while (($achar, $bchar) = next_pair_of_chars()) {
+        $last_slash_seen_at = $counter if check_counter();
+        last if !defined($achar) || !defined($bchar) || $achar ne $bchar;
+        $counter += 1;
+    }
+    ($achar, $bchar) = next_pair_of_chars();
+    $last_slash_seen_at = $counter if check_counter();
+
+    # Make sure the prefix ends on path segment boundary.
+    my $result;
+    if ($last_slash_seen_at == -1) {
+        $result = '';
+    }
+    else {
+        $result = substr($a, 0, $counter);
+    }
+    $result =~ s,/*$,,;
+    debug(sprintf "... RPP: %s", Dumper($result));
+
+    return $result;
+}
+
+sub normalizepaths {
+    # This procedure
+    #
+    #   - figures out what kind of path prefixes the files
+    #     in the manifest and in the local tree use respectively
+    #   - strips off these prefixes as needed
+    #
+    # so it becomes possible to find out which files in the manifest
+    # are *not* in the local tree.
+
+    # References to the lists that hold the local tree and manifest paths
+    # respectively.
+    my $paths_in_tree = shift;
+    my $manifest_entries = shift;
+    my $prefixes = shift;
+
+    # Any path prefixes found? An empty list indicates a bug or error.
+    my @pathsets = ($paths_in_tree, $manifest_entries);
+    my %pdata = zip @$prefixes, @pathsets;
+    while (my ($prefix, $paths) = each(%pdata)) {
+        # Do we need to strip off prefixes for this set of paths?
+        if (length($prefix) > 0) {
+            # Escape regex meta chars in path prefixes (example:
+            # aptoncd-0.1.98+bzr112)
+            $prefix =~ s![+?*]!\\$&!g;
+            # Pre-compile regex for prefix.
+            my $pre = qr(^$prefix/?);
+            map { $_ = canonpath($_); s,$pre,,; $_ } @$paths;
+        }
+    }
+
+    return;
+}
+
+1;
+

=== modified file 'pristine-tar'
--- pristine-tar	2009-04-14 21:23:22 +0000
+++ pristine-tar	2009-10-14 07:36:17 +0000
@@ -8,7 +8,7 @@
 
 B<pristine-tar> [-vdk] gentar delta tarball
 
-B<pristine-tar> [-vdk] gendelta tarball delta
+B<pristine-tar> [-vdk] [-l localtree] gendelta tarball delta
 
 B<pristine-tar> [-vdk] [-m message] commit tarball [upstream]
 
@@ -39,6 +39,10 @@
 
 If the delta filename is "-", it is written to standard output.
 
+If the optional "localtree" argument is passed gendelta will check whether
+the files required are all present in the local tree and abort if this is
+not the case.
+
 =item pristine-tar gentar 
 
 This takes the specified delta file, and the files in the current
@@ -122,12 +126,17 @@
 
 use warnings;
 use strict;
+use File::Basename;
+use File::Path;
 use File::Temp;
-use File::Path;
-use File::Basename;
 use Getopt::Long;
 use Cwd qw{getcwd abs_path};
 
+use FindBin;
+use lib $FindBin::Bin;
+
+use Ptutils;
+
 # magic identification
 use constant GZIP_ID1            => 0x1F;
 use constant GZIP_ID2            => 0x8B;
@@ -144,6 +153,7 @@
 my $debug=0;
 my $keep=0;
 my $message;
+my $localtree;
 	
 # Force locale to C since tar may output utf-8 filenames differently
 # depending on the locale.
@@ -151,7 +161,7 @@
 
 sub usage {
 	print STDERR "Usage: pristine-tar [-vdk] gentar delta tarball\n";
-	print STDERR "       pristine-tar [-vdk] gendelta tarball delta\n";
+	print STDERR "       pristine-tar [-vdk] [-l localtree] gendelta tarball delta\n";
 	print STDERR "       pristine-tar [-vdk] [-m message] commit tarball [upstream]\n";
 	print STDERR "       pristine-tar [-vdk] checkout tarball\n";
 	exit 1;
@@ -195,13 +205,17 @@
 	my $source=shift;
 	my %optio...@_;
 	
-	my @manifest;
-	open (IN, "$tempdir/manifest") || die "$tempdir/manifest: $!";
-	while (<IN>) {
-		chomp;
-		push @manifest, $_;
-	}
-	close IN;
+    my @manifest;
+    open (IN, "$tempdir/manifest") || die "$tempdir/manifest: $!";
+    while (<IN>) {
+        chomp;
+        # Unicode code points (e.g. \201) are read as text i.e. as four
+        # characters and *not* as a single byte. We convert them to bytes
+        # here.
+        s/\\(\d{3})/"chr(0$1)"/eeg;
+        push @manifest, $_;
+    }
+    close IN;
 
 	# The manifest and source should have the same filenames,
 	# but the manifest probably has all the files under a common
@@ -399,6 +413,20 @@
 
 	my $tempdir=tempdir();
 	
+    genmanifest($tarball, "$tempdir/manifest");
+
+    if (defined $localtree) {
+        # Check whether all paths in the manifest are also present in the
+        # local tree.
+        my $missing_in_tree = Ptutils::checkmanifest("$tempdir/manifest", $localtree);
+
+        if ($#$missing_in_tree >= 0) {
+            # Abort here since we don't have all the files required for
+            # generating a pristine tar in the local tree.
+            error("Files missing in local tree: @$missing_in_tree");
+        }
+    }
+
 	my $stdout=0;
 	if ($delta eq "-") {
 		$stdout=1;
@@ -449,7 +477,6 @@
 		$tarball="$tempdir/origtarball";
 	}
 
-	genmanifest($tarball, "$tempdir/manifest");
 	my $recreatetarball;
 	if (! exists $opts{recreatetarball}) {
 		my $sourcedir="$tempdir/tmp";
@@ -746,6 +773,7 @@
 
 Getopt::Long::Configure("bundling");
 if (! GetOptions(
+	"l|localtree=s" => \$localtree,
 	"m|message=s" => \$message,
 	"v|verbose!" => \$verbose,
 	"d|debug!" => \$debug,

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to