This is an automated email from the git hooks/post-receive script.

agwa-guest pushed a commit to branch master
in repository strip-nondeterminism.

commit ad9d61af15035e719d7a0223afcaa5fa25f0c815
Author: Andrew Ayer <a...@andrewayer.name>
Date:   Sun Sep 7 17:40:37 2014 -0700

    Completely reorganize and overhaul
    
    strip-nondeterminism is now a standalone command, and handlers are in
    Perl modules.  debhelper will depend on strip-nondeterminism.
---
 .gitignore                               |  10 ++
 MANIFEST                                 |  11 ++
 MANIFEST.SKIP                            |  21 +++
 Makefile.PL                              |  20 +++
 TODO                                     |   6 +-
 bin/dh_strip_nondeterminism              |  85 +++++++++++
 bin/strip-nondeterminism                 | 110 ++++++++++++++
 dh_strip_nondeterminism                  | 248 -------------------------------
 handlers/README                          |   3 -
 handlers/gzip                            | 104 -------------
 handlers/zip                             |  12 --
 lib/StripNondeterminism.pm               |  76 ++++++++++
 lib/StripNondeterminism/handlers/ar.pm   |  86 +++++++++++
 lib/StripNondeterminism/handlers/gzip.pm | 122 +++++++++++++++
 lib/StripNondeterminism/handlers/jar.pm  |  43 ++++++
 lib/StripNondeterminism/handlers/zip.pm  |  45 ++++++
 16 files changed, 632 insertions(+), 370 deletions(-)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..61bb845
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+# Makemaker generated files and dirs.
+/Makefile
+/blib
+/MakeMaker-*
+/pm_to_blib
+/MYMETA.yml
+
+# Temp and old files.
+*~
+*.old
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..c0c361b
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,11 @@
+COPYING
+MANIFEST                       This list of files
+Makefile.PL
+README
+bin/dh_strip_nondeterminism
+bin/strip-nondeterminism
+lib/StripNondeterminism.pm
+lib/StripNondeterminism/handlers/ar.pm
+lib/StripNondeterminism/handlers/gzip.pm
+lib/StripNondeterminism/handlers/jar.pm
+lib/StripNondeterminism/handlers/zip.pm
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 0000000..93dfd07
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1,21 @@
+# Version control files and dirs.
+\B\.git\b
+
+# Makemaker generated files and dirs.
+^MANIFEST\.
+^Makefile$
+^Makefile.old$
+^blib/
+^MakeMaker-\d
+^pm_to_blib
+
+# Temp, old and emacs backup files.
+~$
+\.old$
+\.swp$
+^#.*#$
+^\.#
+
+# Odds and ends
+\bLEFTOFF$
+\bTODO$
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..affb564
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,20 @@
+use strict;
+use warnings;
+use ExtUtils::MakeMaker;
+
+my $MMVER = eval $ExtUtils::MakeMaker::VERSION;
+
+WriteMakefile(
+       NAME            => 'strip-nondeterminism',
+       AUTHOR          => 'Andrew Ayer <a...@andrewayer.name>',
+       VERSION_FROM    => 'lib/StripNondeterminism.pm',
+       ABSTRACT        => 'tool for stripping non-determinism from files',
+       EXE_FILES       => [qw(bin/dh_strip_nondeterminism 
bin/strip-nondeterminism)],
+       PREREQ_PM => {
+               'Archive::Zip'  => 0,
+               'Getopt::Long'  => 0,
+       },
+       LICENSE         => "gpl",
+       dist            => { COMPRESS => 'gzip -9nf', SUFFIX => 'gz', },
+       clean           => { FILES => 'StripNondeterminism-*' },
+);
diff --git a/TODO b/TODO
index 275cb94..8020fc4 100644
--- a/TODO
+++ b/TODO
@@ -1,10 +1,10 @@
 Write handlers for:
-       gzip
+       [DONE] gzip
        javadocs
-       ar
+       [DONE] ar
        ...
 
-Write main strip-nondeterminism command
+[DONE] Write main strip-nondeterminism command
        It would just parse command line arguments and then execute the correct 
handler
 
 Write dh_strip_nondeterminism
diff --git a/bin/dh_strip_nondeterminism b/bin/dh_strip_nondeterminism
new file mode 100755
index 0000000..4adbf93
--- /dev/null
+++ b/bin/dh_strip_nondeterminism
@@ -0,0 +1,85 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+dh_strip_nondeterminism - strip uninteresting, non-deterministic information 
from files
+
+=cut
+
+use strict;
+use File::Find;
+use Debian::Debhelper::Dh_Lib;
+use StripNondeterminism;
+
+=head1 SYNOPSIS
+
+B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>]
+
+=head1 DESCRIPTION
+
+B<dh_strip_nondeterminism> is a debhelper program that is responsible
+for stripping uninteresting, non-deterministic information, such as
+timestamps, from compiled files so that the build is reproducible.
+
+This program examines your package build directories and works out what
+to strip on its own. It uses L<file(1)> and filenames to figure out what
+files should have non-determinism stripped from them.  In general it
+seems to make very good guesses, and will do the right thing in almost
+all cases.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-X>I<item>, B<--exclude=>I<item>
+
+Exclude files that contain I<item> anywhere in their filename from being
+stripped. You may use this option multiple times to build up a list of
+things to exclude.
+
+=back
+
+=cut
+
+init();
+
+my @nondeterministic_files;
+sub testfile {
+       return if -l $_ or -d $_; # Skip directories and symlinks always.
+
+       # See if we were asked to exclude this file.
+       # Note that we have to test on the full filename, including directory.
+       my $fn="$File::Find::dir/$_";
+       foreach my $f (@{$dh{EXCLUDE}}) {
+               return if ($fn=~m/\Q$f\E/);
+       }
+
+       my $normalizer = StripNondeterminism::get_normalizer_for_file($_);
+       if ($normalizer) {
+               push @nondeterministic_files, [$fn, $normalizer];
+       }
+}
+
+foreach my $package (@{$dh{DOPACKAGES}}) {
+       my $tmp=tmpdir($package);
+
+       @nondeterministic_files=();
+       find(\&testfile,$tmp);
+
+       foreach (@nondeterministic_files) {
+               my ($path, $normalize) = @$_;
+               $normalize->($path);
+       }
+}
+
+=head1 SEE ALSO
+
+L<debhelper(7)>
+
+This program is a part of debhelper.
+
+=head1 AUTHOR
+
+Andrew Ayer <a...@andrewayer.name>
+
+=cut
diff --git a/bin/strip-nondeterminism b/bin/strip-nondeterminism
new file mode 100755
index 0000000..38c68ca
--- /dev/null
+++ b/bin/strip-nondeterminism
@@ -0,0 +1,110 @@
+#!/usr/bin/perl
+
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+use strict;
+use warnings;
+
+use StripNondeterminism;
+use Getopt::Long;
+Getopt::Long::Configure(qw(no_ignore_case permute bundling));
+
+my $cmd = $0;
+$cmd =~ s/.*\///;
+my $usage = "Usage: $cmd [-t|--type FILETYPE] FILENAME\n";
+
+my @opspec = (
+       'type|t=s', 'help|h', 'version|V',
+);
+my $glop = {};
+GetOptions($glop, @opspec) || die $usage;
+
+if ($glop->{help}) {
+       print $usage;
+       exit 0;
+}
+
+if ($glop->{version}) {
+       print "$cmd version $StripNondeterminism::VERSION\n";
+       exit 0;
+}
+
+die $usage if @ARGV == 0;
+
+for my $filename (@ARGV) {
+       my $normalizer;
+       if ($glop->{type}) {
+               $normalizer = 
StripNondeterminism::get_normalizer_by_name($glop->{type});
+               die $glop->{type} . ": Unknown file type\n" unless $normalizer;
+       } else {
+               $normalizer = 
StripNondeterminism::get_normalizer_for_file($filename);
+               next unless $normalizer;
+       }
+
+       $normalizer->($filename);
+}
+
+__END__
+
+=head1 NAME
+
+strip-nondeterminism - strip non-deterministic information from files
+
+=head1 SYNOPSIS
+
+ strip-nondeterminism [-t filetype] filename ...
+
+=head1 DESCRIPTION
+
+B<strip-nondeterminism> is a tool to strip bits of non-deterministic
+information, such as timestamps, from files.  It can be used as
+a post-processing step to make a build reproducible, when the build
+process itself cannot be made deterministic.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-t> I<filetype>, B<--type> I<filetype>
+
+Use the normalizer for the given file type (ar, gzip, jar, zip).  If this
+option is not specified, the file type is detected automatically based on
+the file name extension.
+
+=item B<-h>, B<--help>
+
+Display this help message.
+
+=item B<-V>, B<--version>
+
+Print only the version string and then quit.
+
+=back
+
+=head1 AUTHOR
+
+Andrew Ayer
+
+=head1 COPYRIGHT
+
+strip-nondeterminism is free software.  You can redistribute it and/or
+modify it under the terms of the GNU General Public License, version 3.
+
+=cut
diff --git a/dh_strip_nondeterminism b/dh_strip_nondeterminism
deleted file mode 100755
index 501b1a4..0000000
--- a/dh_strip_nondeterminism
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/perl -w
-
-=head1 NAME
-
-dh_strip_nondeterminism - strip uninteresting, non-deterministic information 
from files
-
-=cut
-
-use strict;
-use File::Find;
-use Debian::Debhelper::Dh_Lib;
-
-=head1 SYNOPSIS
-
-B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>]
-
-=head1 DESCRIPTION
-
-B<dh_strip_nondeterminism> is a debhelper program that is responsible
-for stripping uninteresting, non-deterministic information, such as
-timestamps, from compiled files so that the build is reproducible.
-
-This program examines your package build directories and works out what
-to strip on its own. It uses L<file(1)> and filenames to figure out what
-files should have non-determinism stripped from them.  In general it
-seems to make very good guesses, and will do the right thing in almost
-all cases.
-
-=head1 OPTIONS
-
-=over 4
-
-=item B<-X>I<item>, B<--exclude=>I<item>
-
-Exclude files that contain I<item> anywhere in their filename from being
-stripped. You may use this option multiple times to build up a list of
-things to exclude.
-
-=back
-
-=cut
-
-init();
-
-# I could just use `file $_[0]`, but this is safer
-sub get_file_type {
-       my $file=shift;
-       open (FILE, '-|') # handle all filenames safely
-               || exec('file', $file)
-               || die "can't exec file: $!";
-       my $type=<FILE>;
-       close FILE;
-       return $type;
-}
-
-my @nondeterministic_files;
-sub testfile {
-       return if -l $_ or -d $_; # Skip directories and symlinks always.
-
-       # See if we were asked to exclude this file.
-       # Note that we have to test on the full filename, including directory.
-       my $fn="$File::Find::dir/$_";
-       foreach my $f (@{$dh{EXCLUDE}}) {
-               return if ($fn=~m/\Q$f\E/);
-       }
-
-       # gzip
-       if (m/\.gz$/) {
-               # No need for get_file_type b/c the gzip normalizer
-               # silently ignores non-gzip files
-               push @nondeterministic_files, [$fn, 
\&handlers::gzip::normalize];
-       }
-       # zip
-       if (m/\.zip$/ && get_file_type($_) =~ m/Zip archive data/) {
-               push @nondeterministic_files, [$fn, \&handlers::zip::normalize];
-       }
-       # jar
-       if (m/\.jar$/ && get_file_type($_) =~ m/Zip archive data/) {
-               push @nondeterministic_files, [$fn, \&handlers::jar::normalize];
-       }
-}
-
-foreach my $package (@{$dh{DOPACKAGES}}) {
-       my $tmp=tmpdir($package);
-
-       @nondeterministic_files=();
-       find(\&testfile,$tmp);
-
-       foreach (@nondeterministic_files) {
-               my ($path, $normalize) = @$_;
-               $normalize->($path);
-       }
-}
-
-package handlers::gzip;
-
-use Debian::Debhelper::Dh_Lib;
-use File::Temp qw/tempfile/;
-
-use constant {
-       FTEXT    => 1 << 0,
-       FHCRC    => 1 << 1,
-       FEXTRA   => 1 << 2,
-       FNAME    => 1 << 3,
-       FCOMMENT => 1 << 4,
-};
-
-sub normalize {
-       my ($filename) = @_;
-
-       open(my $fh, '<', $filename) or error "Unable to open $filename for 
reading: $!";
-       my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), 
UNLINK => 1);
-
-       # See RFC 1952
-
-       # 0   1   2   3   4   5   6   7   8   9   10
-       # +---+---+---+---+---+---+---+---+---+---+
-       # |ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
-       # +---+---+---+---+---+---+---+---+---+---+
-
-       # Read the current header
-       my $hdr;
-       my $bytes_read = read($fh, $hdr, 10);
-       return unless $bytes_read == 10;
-       my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', 
$hdr);
-       return unless $id1 == 31 and $id2 == 139;
-
-       my $new_flg = $flg;
-       $new_flg &= ~FNAME;     # Don't include filename
-       $new_flg &= ~FHCRC;     # Don't include header CRC (not all 
implementations support it)
-       $mtime = 0;             # Zero out mtime (this is what `gzip -n` does)
-       # TODO: question: normalize some of the other fields, such as OS?
-
-       # Write a new header
-       print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, 
$os);
-
-       if ($flg & FEXTRA) {    # Copy through
-               # 0   1   2
-               # +---+---+=================================+
-               # | XLEN  |...XLEN bytes of "extra field"...|
-               # +---+---+=================================+
-               my $buf;
-               read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip 
file";
-               my ($xlen) = unpack('v', $buf);
-               read($fh, $buf, $xlen) == $xlen or error "$filename: Malformed 
gzip file";
-               print $out_fh pack('vA*', $xlen, $buf);
-       }
-       if ($flg & FNAME) {     # Read but do not copy through
-               # 0
-               # +=========================================+
-               # |...original file name, zero-terminated...|
-               # +=========================================+
-               while (1) {
-                       my $buf;
-                       read($fh, $buf, 1) == 1 or error "$filename: Malformed 
gzip file";
-                       last if ord($buf) == 0;
-               }
-       }
-       if ($flg & FCOMMENT) {  # Copy through
-               # 0
-               # +===================================+
-               # |...file comment, zero-terminated...|
-               # +===================================+
-               while (1) {
-                       my $buf;
-                       read($fh, $buf, 1) == 1 or error "$filename: Malformed 
gzip file";
-                       print $out_fh $buf;
-                       last if ord($buf) == 0;
-               }
-       }
-       if ($flg & FHCRC) {     # Read but do not copy through
-               # 0   1   2
-               # +---+---+
-               # | CRC16 |
-               # +---+---+
-               my $buf;
-               read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip 
file";
-       }
-
-       # Copy through the rest of the file.
-       # TODO: also normalize concatenated gzip files.  This will require 
reading and understanding
-       # each DEFLATE block (see RFC 1951), since gzip doesn't include lengths 
anywhere.
-       while (1) {
-               my $buf;
-               my $bytes_read = read($fh, $buf, 4096);
-               defined($bytes_read) or error "$filename: read failed: $!";
-               print $out_fh $buf;
-               last if $bytes_read == 0;
-       }
-
-       chmod((stat($fh))[2] & 07777, $out_filename);
-       rename($out_filename, $filename) or error "$filename: unable to 
overwrite: rename: $!";
-}
-
-package handlers::zip;
-
-use Debian::Debhelper::Dh_Lib;
-use Archive::Zip;
-
-Archive::Zip::setErrorHandler(\&error);
-
-# A magic number from Archive::Zip for the earliest timestamp that
-# can be represented by a Zip file.  From the Archive::Zip source:
-# "Note, this isn't exactly UTC 1980, it's 1980 + 12 hours and 1
-# minute so that nothing timezoney can muck us up."
-use constant SAFE_EPOCH => 315576060;
-
-sub normalize {
-       my ($zip_filename, $filename_cmp) = @_;
-       $filename_cmp ||= sub { $a cmp $b };
-       my $zip = Archive::Zip->new($zip_filename);
-       my @filenames = sort $filename_cmp $zip->memberNames();
-       for my $filename (@filenames) {
-               my $member = $zip->removeMember($filename);
-               $zip->addMember($member);
-               $member->setLastModFileDateTimeFromUnix(SAFE_EPOCH);
-       }
-       $zip->overwrite();
-}
-
-package handlers::jar;
-
-sub _jar_filename_cmp {
-       # META-INF/ and META-INF/MANIFEST.MF are expected to be the first 
entries in the Zip archive.
-       return 0 if $a eq $b;
-       for (qw{META-INF/ META-INF/MANIFEST.MF}) {
-               return -1 if $a eq $_;
-               return  1 if $b eq $_;
-       }
-       return $a cmp $b;
-}
-
-sub normalize {
-       my ($jar_filename) = @_;
-       handlers::zip::normalize($jar_filename, \&_jar_filename_cmp);
-}
-
-=head1 SEE ALSO
-
-L<debhelper(7)>
-
-This program is a part of debhelper.
-
-=head1 AUTHOR
-
-Andrew Ayer <a...@andrewayer.name>
-
-=cut
diff --git a/handlers/README b/handlers/README
deleted file mode 100644
index be83133..0000000
--- a/handlers/README
+++ /dev/null
@@ -1,3 +0,0 @@
-A handler reads a file of a particular format from stdin, strips the
-non-determinism from it, and writes the result to stdout.  A handler
-can be written in any language.
diff --git a/handlers/gzip b/handlers/gzip
deleted file mode 100755
index 79134b1..0000000
--- a/handlers/gzip
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2014 Andrew Ayer
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-use strict;
-use warnings;
-
-use constant {
-       FTEXT    => 1 << 0,
-       FHCRC    => 1 << 1,
-       FEXTRA   => 1 << 2,
-       FNAME    => 1 << 3,
-       FCOMMENT => 1 << 4,
-};
-
-# See RFC 1952
-
-# 0   1   2   3   4   5   6   7   8   9   10
-# +---+---+---+---+---+---+---+---+---+---+
-# |ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
-# +---+---+---+---+---+---+---+---+---+---+
-
-# Read the current header
-my $hdr;
-my $bytes_read = read(*STDIN, $hdr, 10);
-die "Not a gzip file" unless $bytes_read == 10;
-my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr);
-die "Not a gzip file" unless $id1 == 31 and $id2 == 139;
-
-my $new_flg = $flg;
-$new_flg &= ~FNAME;    # Don't include filename
-$new_flg &= ~FHCRC;    # Don't include header CRC (not all implementations 
support it)
-$mtime = 0;            # Zero out mtime (this is what `gzip -n` does)
-# TODO: question: normalize some of the other fields, such as OS?
-
-# Write a new header
-print pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os);
-
-if ($flg & FEXTRA) {   # Copy through
-       # 0   1   2
-       # +---+---+=================================+
-       # | XLEN  |...XLEN bytes of "extra field"...|
-       # +---+---+=================================+
-       my $buf;
-       read(*STDIN, $buf, 2) == 2 or die "Malformed gzip file";
-       my ($xlen) = unpack('v', $buf);
-       read(*STDIN, $buf, $xlen) == $xlen or die "Malformed gzip file";
-       print pack('vA*', $xlen, $buf);
-}
-if ($flg & FNAME) {    # Read but do not copy through
-       # 0
-       # +=========================================+
-       # |...original file name, zero-terminated...|
-       # +=========================================+
-       while (1) {
-               my $buf;
-               read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file";
-               last if ord($buf) == 0;
-       }
-}
-if ($flg & FCOMMENT) { # Copy through
-       # 0
-       # +===================================+
-       # |...file comment, zero-terminated...|
-       # +===================================+
-       while (1) {
-               my $buf;
-               read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file";
-               print $buf;
-               last if ord($buf) == 0;
-       }
-}
-if ($flg & FHCRC) {    # Read but do not copy through
-       # 0   1   2
-       # +---+---+
-       # | CRC16 |
-       # +---+---+
-       my $buf;
-       read(*STDIN, $buf, 2) == 2 or die "Not a gzip file";
-}
-
-# Copy through the rest of the file.
-# TODO: support concatenated gzip files.  This will require reading and 
understanding
-# each DEFLATE block (see RFC 1951), since gzip doesn't include lengths 
anywhere.
-while (1) {
-       my $buf;
-       my $bytes_read = read(*STDIN, $buf, 4096);
-       defined($bytes_read) or die "read failed: $!";
-       print $buf;
-       last if $bytes_read == 0;
-}
diff --git a/handlers/zip b/handlers/zip
deleted file mode 100755
index 7a1dba0..0000000
--- a/handlers/zip
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/python
-
-from zipfile import ZipFile
-import sys
-
-with ZipFile(sys.stdout, 'w') as outzip:
-  with ZipFile(sys.stdin, 'r') as inzip:
-    for info in sorted(inzip.infolist(), key=lambda info: info.filename):
-      # 1980-01-01 is the earliest date that the zip format supports
-      info.date_time = (1980, 1, 1, 0, 0, 0)
-      content = inzip.read(info.filename)
-      outzip.writestr(info, content)
diff --git a/lib/StripNondeterminism.pm b/lib/StripNondeterminism.pm
new file mode 100644
index 0000000..b01d673
--- /dev/null
+++ b/lib/StripNondeterminism.pm
@@ -0,0 +1,76 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism.  If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism;
+
+use strict;
+use warnings;
+
+use StripNondeterminism::handlers::ar;
+use StripNondeterminism::handlers::gzip;
+use StripNondeterminism::handlers::jar;
+use StripNondeterminism::handlers::zip;
+
+our($VERSION);
+
+$VERSION = '0.001'; # 0.001
+
+sub _get_file_type {
+       my $file=shift;
+       open (FILE, '-|') # handle all filenames safely
+               || exec('file', $file)
+               || die "can't exec file: $!";
+       my $type=<FILE>;
+       close FILE;
+       return $type;
+}
+
+sub get_normalizer_for_file {
+       $_ = shift;
+
+       return undef if -d $_; # Skip directories
+
+       # ar
+       if (m/\.a$/ && _get_file_type($_) =~ m/ar archive/) {
+               return \&StripNondeterminism::handlers::ar::normalize;
+       }
+       # gzip
+       if (m/\.gz$/ && _get_file_type($_) =~ m/gzip compressed data/) {
+               return \&StripNondeterminism::handlers::gzip::normalize;
+       }
+       # jar
+       if (m/\.jar$/ && _get_file_type($_) =~ m/Zip archive data/) {
+               return \&StripNondeterminism::handlers::jar::normalize;
+       }
+       # zip
+       if (m/\.zip$/ && _get_file_type($_) =~ m/Zip archive data/) {
+               return \&StripNondeterminism::handlers::zip::normalize;
+       }
+       return undef;
+}
+
+sub get_normalizer_by_name {
+       $_ = shift;
+       return \&StripNondeterminism::handlers::ar::normalize if $_ eq 'ar';
+       return \&StripNondeterminism::handlers::gzip::normalize if $_ eq 'gzip';
+       return \&StripNondeterminism::handlers::jar::normalize if $_ eq 'jar';
+       return \&StripNondeterminism::handlers::zip::normalize if $_ eq 'zip';
+       return undef;
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/ar.pm 
b/lib/StripNondeterminism/handlers/ar.pm
new file mode 100644
index 0000000..02d1b99
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/ar.pm
@@ -0,0 +1,86 @@
+# Copyright © 2014 Jérémy Bobbio <lu...@debian.org>
+# Copyright © 2014 Niko Tyni <nt...@debian.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Some code borrowed from ArFile
+# Copyright (C) 2007    Stefano Zacchiroli  <z...@debian.org>
+# Copyright (C) 2007    Filippo Giunchedi   <fili...@debian.org>
+
+package StripNondeterminism::handlers::ar;
+
+use strict;
+use warnings;
+
+use Fcntl q/SEEK_SET/;
+
+sub normalize {
+       my ($file) = @_;
+
+       my $GLOBAL_HEADER = "!<arch>\n";
+       my $GLOBAL_HEADER_LENGTH = length $GLOBAL_HEADER;
+
+       my $FILE_HEADER_LENGTH = 60;
+       my $FILE_MAGIC = "`\n";
+
+       my $buf;
+
+       open(my $fh, '+<', $file)
+           or die("failed to open $file for read+write: $!");
+
+       read $fh, $buf, $GLOBAL_HEADER_LENGTH;
+       return if $buf ne $GLOBAL_HEADER;
+
+       while (1) {
+               my $file_header_start = tell $fh;
+               my $count = read $fh, $buf, $FILE_HEADER_LENGTH;
+               die "reading $file failed: $!" if !defined $count;
+               last if $count == 0;
+
+               # http://en.wikipedia.org/wiki/Ar_(Unix)    
+               #from   to     Name                      Format
+               #0      15     File name                 ASCII
+               #16     27     File modification date    Decimal
+               #28     33     Owner ID                  Decimal
+               #34     39     Group ID                  Decimal
+               #40     47     File mode                 Octal
+               #48     57     File size in bytes        Decimal
+               #58     59     File magic                \140\012
+
+               # FIXME: is this correct?
+               last if $count == 1 and eof($fh) and $buf eq "\n";
+
+               die "Incorrect header length"
+               if length $buf != $FILE_HEADER_LENGTH;
+               die "Incorrect file magic"
+               if substr($buf, 58, length($FILE_MAGIC)) ne $FILE_MAGIC;
+
+               my $file_size = substr($buf, 48, 10);
+               seek $fh, $file_header_start + 16, SEEK_SET;
+
+               # mtime
+               syswrite $fh, sprintf("%-12d", 0);
+               # owner
+               syswrite $fh, sprintf("%-6d", 0);
+               # group
+               syswrite $fh, sprintf("%-6d", 0);
+               # file mode
+               syswrite $fh, sprintf("%-8o", 0644);
+
+               # move to next member
+               seek $fh, $file_header_start + $FILE_HEADER_LENGTH + 
$file_size, SEEK_SET;
+       }
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/gzip.pm 
b/lib/StripNondeterminism/handlers/gzip.pm
new file mode 100644
index 0000000..daa82d2
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/gzip.pm
@@ -0,0 +1,122 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism.  If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism::handlers::gzip;
+
+use strict;
+use warnings;
+
+use File::Temp qw/tempfile/;
+use File::Basename;
+
+use constant {
+       FTEXT    => 1 << 0,
+       FHCRC    => 1 << 1,
+       FEXTRA   => 1 << 2,
+       FNAME    => 1 << 3,
+       FCOMMENT => 1 << 4,
+};
+
+sub normalize {
+       my ($filename) = @_;
+
+       open(my $fh, '<', $filename) or die "Unable to open $filename for 
reading: $!";
+       my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), 
UNLINK => 1);
+
+       # See RFC 1952
+
+       # 0   1   2   3   4   5   6   7   8   9   10
+       # +---+---+---+---+---+---+---+---+---+---+
+       # |ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
+       # +---+---+---+---+---+---+---+---+---+---+
+
+       # Read the current header
+       my $hdr;
+       my $bytes_read = read($fh, $hdr, 10);
+       return unless $bytes_read == 10;
+       my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', 
$hdr);
+       return unless $id1 == 31 and $id2 == 139;
+
+       my $new_flg = $flg;
+       $new_flg &= ~FNAME;     # Don't include filename
+       $new_flg &= ~FHCRC;     # Don't include header CRC (not all 
implementations support it)
+       $mtime = 0;             # Zero out mtime (this is what `gzip -n` does)
+       # TODO: question: normalize some of the other fields, such as OS?
+
+       # Write a new header
+       print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, 
$os);
+
+       if ($flg & FEXTRA) {    # Copy through
+               # 0   1   2
+               # +---+---+=================================+
+               # | XLEN  |...XLEN bytes of "extra field"...|
+               # +---+---+=================================+
+               my $buf;
+               read($fh, $buf, 2) == 2 or die "$filename: Malformed gzip file";
+               my ($xlen) = unpack('v', $buf);
+               read($fh, $buf, $xlen) == $xlen or die "$filename: Malformed 
gzip file";
+               print $out_fh pack('vA*', $xlen, $buf);
+       }
+       if ($flg & FNAME) {     # Read but do not copy through
+               # 0
+               # +=========================================+
+               # |...original file name, zero-terminated...|
+               # +=========================================+
+               while (1) {
+                       my $buf;
+                       read($fh, $buf, 1) == 1 or die "$filename: Malformed 
gzip file";
+                       last if ord($buf) == 0;
+               }
+       }
+       if ($flg & FCOMMENT) {  # Copy through
+               # 0
+               # +===================================+
+               # |...file comment, zero-terminated...|
+               # +===================================+
+               while (1) {
+                       my $buf;
+                       read($fh, $buf, 1) == 1 or die "$filename: Malformed 
gzip file";
+                       print $out_fh $buf;
+                       last if ord($buf) == 0;
+               }
+       }
+       if ($flg & FHCRC) {     # Read but do not copy through
+               # 0   1   2
+               # +---+---+
+               # | CRC16 |
+               # +---+---+
+               my $buf;
+               read($fh, $buf, 2) == 2 or die "$filename: Malformed gzip file";
+       }
+
+       # Copy through the rest of the file.
+       # TODO: also normalize concatenated gzip files.  This will require 
reading and understanding
+       # each DEFLATE block (see RFC 1951), since gzip doesn't include lengths 
anywhere.
+       while (1) {
+               my $buf;
+               my $bytes_read = read($fh, $buf, 4096);
+               defined($bytes_read) or die "$filename: read failed: $!";
+               print $out_fh $buf;
+               last if $bytes_read == 0;
+       }
+
+       chmod((stat($fh))[2] & 07777, $out_filename);
+       rename($out_filename, $filename) or die "$filename: unable to 
overwrite: rename: $!";
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/jar.pm 
b/lib/StripNondeterminism/handlers/jar.pm
new file mode 100644
index 0000000..6be7071
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/jar.pm
@@ -0,0 +1,43 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism.  If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism::handlers::jar;
+
+use strict;
+use warnings;
+
+use Archive::Zip;
+use StripNondeterminism::handlers::zip;
+
+sub _jar_filename_cmp ($$) {
+       my ($a, $b) = @_;
+       # META-INF/ and META-INF/MANIFEST.MF are expected to be the first 
entries in the Zip archive.
+       return 0 if $a eq $b;
+       for (qw{META-INF/ META-INF/MANIFEST.MF}) {
+               return -1 if $a eq $_;
+               return  1 if $b eq $_;
+       }
+       return $a cmp $b;
+}
+
+sub normalize {
+       my ($jar_filename) = @_;
+       StripNondeterminism::handlers::zip::normalize($jar_filename, 
\&_jar_filename_cmp);
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/zip.pm 
b/lib/StripNondeterminism/handlers/zip.pm
new file mode 100644
index 0000000..e19c7fc
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/zip.pm
@@ -0,0 +1,45 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism.  If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism::handlers::zip;
+
+use strict;
+use warnings;
+
+use Archive::Zip;
+
+# A magic number from Archive::Zip for the earliest timestamp that
+# can be represented by a Zip file.  From the Archive::Zip source:
+# "Note, this isn't exactly UTC 1980, it's 1980 + 12 hours and 1
+# minute so that nothing timezoney can muck us up."
+use constant SAFE_EPOCH => 315576060;
+
+sub normalize {
+       my ($zip_filename, $filename_cmp) = @_;
+       $filename_cmp ||= sub { $a cmp $b };
+       my $zip = Archive::Zip->new($zip_filename);
+       my @filenames = sort $filename_cmp $zip->memberNames();
+       for my $filename (@filenames) {
+               my $member = $zip->removeMember($filename);
+               $zip->addMember($member);
+               $member->setLastModFileDateTimeFromUnix(SAFE_EPOCH);
+       }
+       $zip->overwrite();
+}
+
+1;

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/reproducible/strip-nondeterminism.git

_______________________________________________
Reproducible-builds mailing list
Reproducible-builds@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/reproducible-builds

Reply via email to