This is an automated email from the git hooks/post-receive script. agwa-guest pushed a commit to branch master in repository strip-nondeterminism.
commit ad9d61af15035e719d7a0223afcaa5fa25f0c815 Author: Andrew Ayer <a...@andrewayer.name> Date: Sun Sep 7 17:40:37 2014 -0700 Completely reorganize and overhaul strip-nondeterminism is now a standalone command, and handlers are in Perl modules. debhelper will depend on strip-nondeterminism. --- .gitignore | 10 ++ MANIFEST | 11 ++ MANIFEST.SKIP | 21 +++ Makefile.PL | 20 +++ TODO | 6 +- bin/dh_strip_nondeterminism | 85 +++++++++++ bin/strip-nondeterminism | 110 ++++++++++++++ dh_strip_nondeterminism | 248 ------------------------------- handlers/README | 3 - handlers/gzip | 104 ------------- handlers/zip | 12 -- lib/StripNondeterminism.pm | 76 ++++++++++ lib/StripNondeterminism/handlers/ar.pm | 86 +++++++++++ lib/StripNondeterminism/handlers/gzip.pm | 122 +++++++++++++++ lib/StripNondeterminism/handlers/jar.pm | 43 ++++++ lib/StripNondeterminism/handlers/zip.pm | 45 ++++++ 16 files changed, 632 insertions(+), 370 deletions(-) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..61bb845 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Makemaker generated files and dirs. +/Makefile +/blib +/MakeMaker-* +/pm_to_blib +/MYMETA.yml + +# Temp and old files. +*~ +*.old diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..c0c361b --- /dev/null +++ b/MANIFEST @@ -0,0 +1,11 @@ +COPYING +MANIFEST This list of files +Makefile.PL +README +bin/dh_strip_nondeterminism +bin/strip-nondeterminism +lib/StripNondeterminism.pm +lib/StripNondeterminism/handlers/ar.pm +lib/StripNondeterminism/handlers/gzip.pm +lib/StripNondeterminism/handlers/jar.pm +lib/StripNondeterminism/handlers/zip.pm diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP new file mode 100644 index 0000000..93dfd07 --- /dev/null +++ b/MANIFEST.SKIP @@ -0,0 +1,21 @@ +# Version control files and dirs. +\B\.git\b + +# Makemaker generated files and dirs. +^MANIFEST\. +^Makefile$ +^Makefile.old$ +^blib/ +^MakeMaker-\d +^pm_to_blib + +# Temp, old and emacs backup files. +~$ +\.old$ +\.swp$ +^#.*#$ +^\.# + +# Odds and ends +\bLEFTOFF$ +\bTODO$ diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..affb564 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,20 @@ +use strict; +use warnings; +use ExtUtils::MakeMaker; + +my $MMVER = eval $ExtUtils::MakeMaker::VERSION; + +WriteMakefile( + NAME => 'strip-nondeterminism', + AUTHOR => 'Andrew Ayer <a...@andrewayer.name>', + VERSION_FROM => 'lib/StripNondeterminism.pm', + ABSTRACT => 'tool for stripping non-determinism from files', + EXE_FILES => [qw(bin/dh_strip_nondeterminism bin/strip-nondeterminism)], + PREREQ_PM => { + 'Archive::Zip' => 0, + 'Getopt::Long' => 0, + }, + LICENSE => "gpl", + dist => { COMPRESS => 'gzip -9nf', SUFFIX => 'gz', }, + clean => { FILES => 'StripNondeterminism-*' }, +); diff --git a/TODO b/TODO index 275cb94..8020fc4 100644 --- a/TODO +++ b/TODO @@ -1,10 +1,10 @@ Write handlers for: - gzip + [DONE] gzip javadocs - ar + [DONE] ar ... -Write main strip-nondeterminism command +[DONE] Write main strip-nondeterminism command It would just parse command line arguments and then execute the correct handler Write dh_strip_nondeterminism diff --git a/bin/dh_strip_nondeterminism b/bin/dh_strip_nondeterminism new file mode 100755 index 0000000..4adbf93 --- /dev/null +++ b/bin/dh_strip_nondeterminism @@ -0,0 +1,85 @@ +#!/usr/bin/perl -w + +=head1 NAME + +dh_strip_nondeterminism - strip uninteresting, non-deterministic information from files + +=cut + +use strict; +use File::Find; +use Debian::Debhelper::Dh_Lib; +use StripNondeterminism; + +=head1 SYNOPSIS + +B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>] + +=head1 DESCRIPTION + +B<dh_strip_nondeterminism> is a debhelper program that is responsible +for stripping uninteresting, non-deterministic information, such as +timestamps, from compiled files so that the build is reproducible. + +This program examines your package build directories and works out what +to strip on its own. It uses L<file(1)> and filenames to figure out what +files should have non-determinism stripped from them. In general it +seems to make very good guesses, and will do the right thing in almost +all cases. + +=head1 OPTIONS + +=over 4 + +=item B<-X>I<item>, B<--exclude=>I<item> + +Exclude files that contain I<item> anywhere in their filename from being +stripped. You may use this option multiple times to build up a list of +things to exclude. + +=back + +=cut + +init(); + +my @nondeterministic_files; +sub testfile { + return if -l $_ or -d $_; # Skip directories and symlinks always. + + # See if we were asked to exclude this file. + # Note that we have to test on the full filename, including directory. + my $fn="$File::Find::dir/$_"; + foreach my $f (@{$dh{EXCLUDE}}) { + return if ($fn=~m/\Q$f\E/); + } + + my $normalizer = StripNondeterminism::get_normalizer_for_file($_); + if ($normalizer) { + push @nondeterministic_files, [$fn, $normalizer]; + } +} + +foreach my $package (@{$dh{DOPACKAGES}}) { + my $tmp=tmpdir($package); + + @nondeterministic_files=(); + find(\&testfile,$tmp); + + foreach (@nondeterministic_files) { + my ($path, $normalize) = @$_; + $normalize->($path); + } +} + +=head1 SEE ALSO + +L<debhelper(7)> + +This program is a part of debhelper. + +=head1 AUTHOR + +Andrew Ayer <a...@andrewayer.name> + +=cut diff --git a/bin/strip-nondeterminism b/bin/strip-nondeterminism new file mode 100755 index 0000000..38c68ca --- /dev/null +++ b/bin/strip-nondeterminism @@ -0,0 +1,110 @@ +#!/usr/bin/perl + +# +# Copyright 2014 Andrew Ayer +# +# This file is part of strip-nondeterminism. +# +# strip-nondeterminism is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# strip-nondeterminism is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>. +# + +use strict; +use warnings; + +use StripNondeterminism; +use Getopt::Long; +Getopt::Long::Configure(qw(no_ignore_case permute bundling)); + +my $cmd = $0; +$cmd =~ s/.*\///; +my $usage = "Usage: $cmd [-t|--type FILETYPE] FILENAME\n"; + +my @opspec = ( + 'type|t=s', 'help|h', 'version|V', +); +my $glop = {}; +GetOptions($glop, @opspec) || die $usage; + +if ($glop->{help}) { + print $usage; + exit 0; +} + +if ($glop->{version}) { + print "$cmd version $StripNondeterminism::VERSION\n"; + exit 0; +} + +die $usage if @ARGV == 0; + +for my $filename (@ARGV) { + my $normalizer; + if ($glop->{type}) { + $normalizer = StripNondeterminism::get_normalizer_by_name($glop->{type}); + die $glop->{type} . ": Unknown file type\n" unless $normalizer; + } else { + $normalizer = StripNondeterminism::get_normalizer_for_file($filename); + next unless $normalizer; + } + + $normalizer->($filename); +} + +__END__ + +=head1 NAME + +strip-nondeterminism - strip non-deterministic information from files + +=head1 SYNOPSIS + + strip-nondeterminism [-t filetype] filename ... + +=head1 DESCRIPTION + +B<strip-nondeterminism> is a tool to strip bits of non-deterministic +information, such as timestamps, from files. It can be used as +a post-processing step to make a build reproducible, when the build +process itself cannot be made deterministic. + +=head1 OPTIONS + +=over 4 + +=item B<-t> I<filetype>, B<--type> I<filetype> + +Use the normalizer for the given file type (ar, gzip, jar, zip). If this +option is not specified, the file type is detected automatically based on +the file name extension. + +=item B<-h>, B<--help> + +Display this help message. + +=item B<-V>, B<--version> + +Print only the version string and then quit. + +=back + +=head1 AUTHOR + +Andrew Ayer + +=head1 COPYRIGHT + +strip-nondeterminism is free software. You can redistribute it and/or +modify it under the terms of the GNU General Public License, version 3. + +=cut diff --git a/dh_strip_nondeterminism b/dh_strip_nondeterminism deleted file mode 100755 index 501b1a4..0000000 --- a/dh_strip_nondeterminism +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/perl -w - -=head1 NAME - -dh_strip_nondeterminism - strip uninteresting, non-deterministic information from files - -=cut - -use strict; -use File::Find; -use Debian::Debhelper::Dh_Lib; - -=head1 SYNOPSIS - -B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>] - -=head1 DESCRIPTION - -B<dh_strip_nondeterminism> is a debhelper program that is responsible -for stripping uninteresting, non-deterministic information, such as -timestamps, from compiled files so that the build is reproducible. - -This program examines your package build directories and works out what -to strip on its own. It uses L<file(1)> and filenames to figure out what -files should have non-determinism stripped from them. In general it -seems to make very good guesses, and will do the right thing in almost -all cases. - -=head1 OPTIONS - -=over 4 - -=item B<-X>I<item>, B<--exclude=>I<item> - -Exclude files that contain I<item> anywhere in their filename from being -stripped. You may use this option multiple times to build up a list of -things to exclude. - -=back - -=cut - -init(); - -# I could just use `file $_[0]`, but this is safer -sub get_file_type { - my $file=shift; - open (FILE, '-|') # handle all filenames safely - || exec('file', $file) - || die "can't exec file: $!"; - my $type=<FILE>; - close FILE; - return $type; -} - -my @nondeterministic_files; -sub testfile { - return if -l $_ or -d $_; # Skip directories and symlinks always. - - # See if we were asked to exclude this file. - # Note that we have to test on the full filename, including directory. - my $fn="$File::Find::dir/$_"; - foreach my $f (@{$dh{EXCLUDE}}) { - return if ($fn=~m/\Q$f\E/); - } - - # gzip - if (m/\.gz$/) { - # No need for get_file_type b/c the gzip normalizer - # silently ignores non-gzip files - push @nondeterministic_files, [$fn, \&handlers::gzip::normalize]; - } - # zip - if (m/\.zip$/ && get_file_type($_) =~ m/Zip archive data/) { - push @nondeterministic_files, [$fn, \&handlers::zip::normalize]; - } - # jar - if (m/\.jar$/ && get_file_type($_) =~ m/Zip archive data/) { - push @nondeterministic_files, [$fn, \&handlers::jar::normalize]; - } -} - -foreach my $package (@{$dh{DOPACKAGES}}) { - my $tmp=tmpdir($package); - - @nondeterministic_files=(); - find(\&testfile,$tmp); - - foreach (@nondeterministic_files) { - my ($path, $normalize) = @$_; - $normalize->($path); - } -} - -package handlers::gzip; - -use Debian::Debhelper::Dh_Lib; -use File::Temp qw/tempfile/; - -use constant { - FTEXT => 1 << 0, - FHCRC => 1 << 1, - FEXTRA => 1 << 2, - FNAME => 1 << 3, - FCOMMENT => 1 << 4, -}; - -sub normalize { - my ($filename) = @_; - - open(my $fh, '<', $filename) or error "Unable to open $filename for reading: $!"; - my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), UNLINK => 1); - - # See RFC 1952 - - # 0 1 2 3 4 5 6 7 8 9 10 - # +---+---+---+---+---+---+---+---+---+---+ - # |ID1|ID2|CM |FLG| MTIME |XFL|OS | - # +---+---+---+---+---+---+---+---+---+---+ - - # Read the current header - my $hdr; - my $bytes_read = read($fh, $hdr, 10); - return unless $bytes_read == 10; - my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr); - return unless $id1 == 31 and $id2 == 139; - - my $new_flg = $flg; - $new_flg &= ~FNAME; # Don't include filename - $new_flg &= ~FHCRC; # Don't include header CRC (not all implementations support it) - $mtime = 0; # Zero out mtime (this is what `gzip -n` does) - # TODO: question: normalize some of the other fields, such as OS? - - # Write a new header - print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os); - - if ($flg & FEXTRA) { # Copy through - # 0 1 2 - # +---+---+=================================+ - # | XLEN |...XLEN bytes of "extra field"...| - # +---+---+=================================+ - my $buf; - read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip file"; - my ($xlen) = unpack('v', $buf); - read($fh, $buf, $xlen) == $xlen or error "$filename: Malformed gzip file"; - print $out_fh pack('vA*', $xlen, $buf); - } - if ($flg & FNAME) { # Read but do not copy through - # 0 - # +=========================================+ - # |...original file name, zero-terminated...| - # +=========================================+ - while (1) { - my $buf; - read($fh, $buf, 1) == 1 or error "$filename: Malformed gzip file"; - last if ord($buf) == 0; - } - } - if ($flg & FCOMMENT) { # Copy through - # 0 - # +===================================+ - # |...file comment, zero-terminated...| - # +===================================+ - while (1) { - my $buf; - read($fh, $buf, 1) == 1 or error "$filename: Malformed gzip file"; - print $out_fh $buf; - last if ord($buf) == 0; - } - } - if ($flg & FHCRC) { # Read but do not copy through - # 0 1 2 - # +---+---+ - # | CRC16 | - # +---+---+ - my $buf; - read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip file"; - } - - # Copy through the rest of the file. - # TODO: also normalize concatenated gzip files. This will require reading and understanding - # each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere. - while (1) { - my $buf; - my $bytes_read = read($fh, $buf, 4096); - defined($bytes_read) or error "$filename: read failed: $!"; - print $out_fh $buf; - last if $bytes_read == 0; - } - - chmod((stat($fh))[2] & 07777, $out_filename); - rename($out_filename, $filename) or error "$filename: unable to overwrite: rename: $!"; -} - -package handlers::zip; - -use Debian::Debhelper::Dh_Lib; -use Archive::Zip; - -Archive::Zip::setErrorHandler(\&error); - -# A magic number from Archive::Zip for the earliest timestamp that -# can be represented by a Zip file. From the Archive::Zip source: -# "Note, this isn't exactly UTC 1980, it's 1980 + 12 hours and 1 -# minute so that nothing timezoney can muck us up." -use constant SAFE_EPOCH => 315576060; - -sub normalize { - my ($zip_filename, $filename_cmp) = @_; - $filename_cmp ||= sub { $a cmp $b }; - my $zip = Archive::Zip->new($zip_filename); - my @filenames = sort $filename_cmp $zip->memberNames(); - for my $filename (@filenames) { - my $member = $zip->removeMember($filename); - $zip->addMember($member); - $member->setLastModFileDateTimeFromUnix(SAFE_EPOCH); - } - $zip->overwrite(); -} - -package handlers::jar; - -sub _jar_filename_cmp { - # META-INF/ and META-INF/MANIFEST.MF are expected to be the first entries in the Zip archive. - return 0 if $a eq $b; - for (qw{META-INF/ META-INF/MANIFEST.MF}) { - return -1 if $a eq $_; - return 1 if $b eq $_; - } - return $a cmp $b; -} - -sub normalize { - my ($jar_filename) = @_; - handlers::zip::normalize($jar_filename, \&_jar_filename_cmp); -} - -=head1 SEE ALSO - -L<debhelper(7)> - -This program is a part of debhelper. - -=head1 AUTHOR - -Andrew Ayer <a...@andrewayer.name> - -=cut diff --git a/handlers/README b/handlers/README deleted file mode 100644 index be83133..0000000 --- a/handlers/README +++ /dev/null @@ -1,3 +0,0 @@ -A handler reads a file of a particular format from stdin, strips the -non-determinism from it, and writes the result to stdout. A handler -can be written in any language. diff --git a/handlers/gzip b/handlers/gzip deleted file mode 100755 index 79134b1..0000000 --- a/handlers/gzip +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/perl - -# Copyright 2014 Andrew Ayer -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -use strict; -use warnings; - -use constant { - FTEXT => 1 << 0, - FHCRC => 1 << 1, - FEXTRA => 1 << 2, - FNAME => 1 << 3, - FCOMMENT => 1 << 4, -}; - -# See RFC 1952 - -# 0 1 2 3 4 5 6 7 8 9 10 -# +---+---+---+---+---+---+---+---+---+---+ -# |ID1|ID2|CM |FLG| MTIME |XFL|OS | -# +---+---+---+---+---+---+---+---+---+---+ - -# Read the current header -my $hdr; -my $bytes_read = read(*STDIN, $hdr, 10); -die "Not a gzip file" unless $bytes_read == 10; -my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr); -die "Not a gzip file" unless $id1 == 31 and $id2 == 139; - -my $new_flg = $flg; -$new_flg &= ~FNAME; # Don't include filename -$new_flg &= ~FHCRC; # Don't include header CRC (not all implementations support it) -$mtime = 0; # Zero out mtime (this is what `gzip -n` does) -# TODO: question: normalize some of the other fields, such as OS? - -# Write a new header -print pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os); - -if ($flg & FEXTRA) { # Copy through - # 0 1 2 - # +---+---+=================================+ - # | XLEN |...XLEN bytes of "extra field"...| - # +---+---+=================================+ - my $buf; - read(*STDIN, $buf, 2) == 2 or die "Malformed gzip file"; - my ($xlen) = unpack('v', $buf); - read(*STDIN, $buf, $xlen) == $xlen or die "Malformed gzip file"; - print pack('vA*', $xlen, $buf); -} -if ($flg & FNAME) { # Read but do not copy through - # 0 - # +=========================================+ - # |...original file name, zero-terminated...| - # +=========================================+ - while (1) { - my $buf; - read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file"; - last if ord($buf) == 0; - } -} -if ($flg & FCOMMENT) { # Copy through - # 0 - # +===================================+ - # |...file comment, zero-terminated...| - # +===================================+ - while (1) { - my $buf; - read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file"; - print $buf; - last if ord($buf) == 0; - } -} -if ($flg & FHCRC) { # Read but do not copy through - # 0 1 2 - # +---+---+ - # | CRC16 | - # +---+---+ - my $buf; - read(*STDIN, $buf, 2) == 2 or die "Not a gzip file"; -} - -# Copy through the rest of the file. -# TODO: support concatenated gzip files. This will require reading and understanding -# each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere. -while (1) { - my $buf; - my $bytes_read = read(*STDIN, $buf, 4096); - defined($bytes_read) or die "read failed: $!"; - print $buf; - last if $bytes_read == 0; -} diff --git a/handlers/zip b/handlers/zip deleted file mode 100755 index 7a1dba0..0000000 --- a/handlers/zip +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/python - -from zipfile import ZipFile -import sys - -with ZipFile(sys.stdout, 'w') as outzip: - with ZipFile(sys.stdin, 'r') as inzip: - for info in sorted(inzip.infolist(), key=lambda info: info.filename): - # 1980-01-01 is the earliest date that the zip format supports - info.date_time = (1980, 1, 1, 0, 0, 0) - content = inzip.read(info.filename) - outzip.writestr(info, content) diff --git a/lib/StripNondeterminism.pm b/lib/StripNondeterminism.pm new file mode 100644 index 0000000..b01d673 --- /dev/null +++ b/lib/StripNondeterminism.pm @@ -0,0 +1,76 @@ +# +# Copyright 2014 Andrew Ayer +# +# This file is part of strip-nondeterminism. +# +# strip-nondeterminism is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# strip-nondeterminism is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>. +# +package StripNondeterminism; + +use strict; +use warnings; + +use StripNondeterminism::handlers::ar; +use StripNondeterminism::handlers::gzip; +use StripNondeterminism::handlers::jar; +use StripNondeterminism::handlers::zip; + +our($VERSION); + +$VERSION = '0.001'; # 0.001 + +sub _get_file_type { + my $file=shift; + open (FILE, '-|') # handle all filenames safely + || exec('file', $file) + || die "can't exec file: $!"; + my $type=<FILE>; + close FILE; + return $type; +} + +sub get_normalizer_for_file { + $_ = shift; + + return undef if -d $_; # Skip directories + + # ar + if (m/\.a$/ && _get_file_type($_) =~ m/ar archive/) { + return \&StripNondeterminism::handlers::ar::normalize; + } + # gzip + if (m/\.gz$/ && _get_file_type($_) =~ m/gzip compressed data/) { + return \&StripNondeterminism::handlers::gzip::normalize; + } + # jar + if (m/\.jar$/ && _get_file_type($_) =~ m/Zip archive data/) { + return \&StripNondeterminism::handlers::jar::normalize; + } + # zip + if (m/\.zip$/ && _get_file_type($_) =~ m/Zip archive data/) { + return \&StripNondeterminism::handlers::zip::normalize; + } + return undef; +} + +sub get_normalizer_by_name { + $_ = shift; + return \&StripNondeterminism::handlers::ar::normalize if $_ eq 'ar'; + return \&StripNondeterminism::handlers::gzip::normalize if $_ eq 'gzip'; + return \&StripNondeterminism::handlers::jar::normalize if $_ eq 'jar'; + return \&StripNondeterminism::handlers::zip::normalize if $_ eq 'zip'; + return undef; +} + +1; diff --git a/lib/StripNondeterminism/handlers/ar.pm b/lib/StripNondeterminism/handlers/ar.pm new file mode 100644 index 0000000..02d1b99 --- /dev/null +++ b/lib/StripNondeterminism/handlers/ar.pm @@ -0,0 +1,86 @@ +# Copyright © 2014 Jérémy Bobbio <lu...@debian.org> +# Copyright © 2014 Niko Tyni <nt...@debian.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Some code borrowed from ArFile +# Copyright (C) 2007 Stefano Zacchiroli <z...@debian.org> +# Copyright (C) 2007 Filippo Giunchedi <fili...@debian.org> + +package StripNondeterminism::handlers::ar; + +use strict; +use warnings; + +use Fcntl q/SEEK_SET/; + +sub normalize { + my ($file) = @_; + + my $GLOBAL_HEADER = "!<arch>\n"; + my $GLOBAL_HEADER_LENGTH = length $GLOBAL_HEADER; + + my $FILE_HEADER_LENGTH = 60; + my $FILE_MAGIC = "`\n"; + + my $buf; + + open(my $fh, '+<', $file) + or die("failed to open $file for read+write: $!"); + + read $fh, $buf, $GLOBAL_HEADER_LENGTH; + return if $buf ne $GLOBAL_HEADER; + + while (1) { + my $file_header_start = tell $fh; + my $count = read $fh, $buf, $FILE_HEADER_LENGTH; + die "reading $file failed: $!" if !defined $count; + last if $count == 0; + + # http://en.wikipedia.org/wiki/Ar_(Unix) + #from to Name Format + #0 15 File name ASCII + #16 27 File modification date Decimal + #28 33 Owner ID Decimal + #34 39 Group ID Decimal + #40 47 File mode Octal + #48 57 File size in bytes Decimal + #58 59 File magic \140\012 + + # FIXME: is this correct? + last if $count == 1 and eof($fh) and $buf eq "\n"; + + die "Incorrect header length" + if length $buf != $FILE_HEADER_LENGTH; + die "Incorrect file magic" + if substr($buf, 58, length($FILE_MAGIC)) ne $FILE_MAGIC; + + my $file_size = substr($buf, 48, 10); + seek $fh, $file_header_start + 16, SEEK_SET; + + # mtime + syswrite $fh, sprintf("%-12d", 0); + # owner + syswrite $fh, sprintf("%-6d", 0); + # group + syswrite $fh, sprintf("%-6d", 0); + # file mode + syswrite $fh, sprintf("%-8o", 0644); + + # move to next member + seek $fh, $file_header_start + $FILE_HEADER_LENGTH + $file_size, SEEK_SET; + } +} + +1; diff --git a/lib/StripNondeterminism/handlers/gzip.pm b/lib/StripNondeterminism/handlers/gzip.pm new file mode 100644 index 0000000..daa82d2 --- /dev/null +++ b/lib/StripNondeterminism/handlers/gzip.pm @@ -0,0 +1,122 @@ +# +# Copyright 2014 Andrew Ayer +# +# This file is part of strip-nondeterminism. +# +# strip-nondeterminism is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# strip-nondeterminism is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>. +# +package StripNondeterminism::handlers::gzip; + +use strict; +use warnings; + +use File::Temp qw/tempfile/; +use File::Basename; + +use constant { + FTEXT => 1 << 0, + FHCRC => 1 << 1, + FEXTRA => 1 << 2, + FNAME => 1 << 3, + FCOMMENT => 1 << 4, +}; + +sub normalize { + my ($filename) = @_; + + open(my $fh, '<', $filename) or die "Unable to open $filename for reading: $!"; + my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), UNLINK => 1); + + # See RFC 1952 + + # 0 1 2 3 4 5 6 7 8 9 10 + # +---+---+---+---+---+---+---+---+---+---+ + # |ID1|ID2|CM |FLG| MTIME |XFL|OS | + # +---+---+---+---+---+---+---+---+---+---+ + + # Read the current header + my $hdr; + my $bytes_read = read($fh, $hdr, 10); + return unless $bytes_read == 10; + my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr); + return unless $id1 == 31 and $id2 == 139; + + my $new_flg = $flg; + $new_flg &= ~FNAME; # Don't include filename + $new_flg &= ~FHCRC; # Don't include header CRC (not all implementations support it) + $mtime = 0; # Zero out mtime (this is what `gzip -n` does) + # TODO: question: normalize some of the other fields, such as OS? + + # Write a new header + print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os); + + if ($flg & FEXTRA) { # Copy through + # 0 1 2 + # +---+---+=================================+ + # | XLEN |...XLEN bytes of "extra field"...| + # +---+---+=================================+ + my $buf; + read($fh, $buf, 2) == 2 or die "$filename: Malformed gzip file"; + my ($xlen) = unpack('v', $buf); + read($fh, $buf, $xlen) == $xlen or die "$filename: Malformed gzip file"; + print $out_fh pack('vA*', $xlen, $buf); + } + if ($flg & FNAME) { # Read but do not copy through + # 0 + # +=========================================+ + # |...original file name, zero-terminated...| + # +=========================================+ + while (1) { + my $buf; + read($fh, $buf, 1) == 1 or die "$filename: Malformed gzip file"; + last if ord($buf) == 0; + } + } + if ($flg & FCOMMENT) { # Copy through + # 0 + # +===================================+ + # |...file comment, zero-terminated...| + # +===================================+ + while (1) { + my $buf; + read($fh, $buf, 1) == 1 or die "$filename: Malformed gzip file"; + print $out_fh $buf; + last if ord($buf) == 0; + } + } + if ($flg & FHCRC) { # Read but do not copy through + # 0 1 2 + # +---+---+ + # | CRC16 | + # +---+---+ + my $buf; + read($fh, $buf, 2) == 2 or die "$filename: Malformed gzip file"; + } + + # Copy through the rest of the file. + # TODO: also normalize concatenated gzip files. This will require reading and understanding + # each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere. + while (1) { + my $buf; + my $bytes_read = read($fh, $buf, 4096); + defined($bytes_read) or die "$filename: read failed: $!"; + print $out_fh $buf; + last if $bytes_read == 0; + } + + chmod((stat($fh))[2] & 07777, $out_filename); + rename($out_filename, $filename) or die "$filename: unable to overwrite: rename: $!"; +} + +1; diff --git a/lib/StripNondeterminism/handlers/jar.pm b/lib/StripNondeterminism/handlers/jar.pm new file mode 100644 index 0000000..6be7071 --- /dev/null +++ b/lib/StripNondeterminism/handlers/jar.pm @@ -0,0 +1,43 @@ +# +# Copyright 2014 Andrew Ayer +# +# This file is part of strip-nondeterminism. +# +# strip-nondeterminism is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# strip-nondeterminism is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>. +# +package StripNondeterminism::handlers::jar; + +use strict; +use warnings; + +use Archive::Zip; +use StripNondeterminism::handlers::zip; + +sub _jar_filename_cmp ($$) { + my ($a, $b) = @_; + # META-INF/ and META-INF/MANIFEST.MF are expected to be the first entries in the Zip archive. + return 0 if $a eq $b; + for (qw{META-INF/ META-INF/MANIFEST.MF}) { + return -1 if $a eq $_; + return 1 if $b eq $_; + } + return $a cmp $b; +} + +sub normalize { + my ($jar_filename) = @_; + StripNondeterminism::handlers::zip::normalize($jar_filename, \&_jar_filename_cmp); +} + +1; diff --git a/lib/StripNondeterminism/handlers/zip.pm b/lib/StripNondeterminism/handlers/zip.pm new file mode 100644 index 0000000..e19c7fc --- /dev/null +++ b/lib/StripNondeterminism/handlers/zip.pm @@ -0,0 +1,45 @@ +# +# Copyright 2014 Andrew Ayer +# +# This file is part of strip-nondeterminism. +# +# strip-nondeterminism is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# strip-nondeterminism is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>. +# +package StripNondeterminism::handlers::zip; + +use strict; +use warnings; + +use Archive::Zip; + +# A magic number from Archive::Zip for the earliest timestamp that +# can be represented by a Zip file. From the Archive::Zip source: +# "Note, this isn't exactly UTC 1980, it's 1980 + 12 hours and 1 +# minute so that nothing timezoney can muck us up." +use constant SAFE_EPOCH => 315576060; + +sub normalize { + my ($zip_filename, $filename_cmp) = @_; + $filename_cmp ||= sub { $a cmp $b }; + my $zip = Archive::Zip->new($zip_filename); + my @filenames = sort $filename_cmp $zip->memberNames(); + for my $filename (@filenames) { + my $member = $zip->removeMember($filename); + $zip->addMember($member); + $member->setLastModFileDateTimeFromUnix(SAFE_EPOCH); + } + $zip->overwrite(); +} + +1; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/strip-nondeterminism.git _______________________________________________ Reproducible-builds mailing list Reproducible-builds@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/reproducible-builds