On 19/07/16 00:28, Florian Pritz wrote:
> Signed-off-by: Florian Pritz <[email protected]>

A commit message would be nice...

Is there any reason PGP checksums are not checked?

> ---
>  contrib/doc/verify-pacman-repo-db.1.txt |  50 ++++++
>  contrib/verify-pacman-repo-db.pl        | 279 
> ++++++++++++++++++++++++++++++++
>  2 files changed, 329 insertions(+)
>  create mode 100644 contrib/doc/verify-pacman-repo-db.1.txt
>  create mode 100755 contrib/verify-pacman-repo-db.pl
> 
> diff --git a/contrib/doc/verify-pacman-repo-db.1.txt 
> b/contrib/doc/verify-pacman-repo-db.1.txt
> new file mode 100644
> index 0000000..01eafbe
> --- /dev/null
> +++ b/contrib/doc/verify-pacman-repo-db.1.txt
> @@ -0,0 +1,50 @@
> +/////
> +vim:set ts=4 sw=4 syntax=asciidoc noet spell spelllang=en_us:
> +/////
> +verify-pacman-repo-db(1)
> +=======================
> +
> +Name
> +----
> +verify-pacman-repo-db - package repository verification utility
> +
> +
> +Synopsis
> +--------
> +'verify-pacman-repo-db' [options]
> +
> +
> +Description
> +-----------
> +'verify-pacman-repo-db' looks at a pacman repo database and verifies its
> +content with the actual package files. The database is expected to be in
> +the same directory as the packages (or symlinks to the packages).
> +
> +The following properties are verified for each package in the database:
> +
> +      - existence of the package file
> +      - file size
> +      - MD5 and SHA256 checksum (--checksum)
> +
> +Options
> +-------
> +*-h, \--help*::
> +     Output a short help message.
> +
> +*\--debug*::
> +     Enable debug output.
> +
> +*-c, \--checksum*::
> +     Verify checksums of packages. Note that this means all packages files 
> will
> +     be read from disk. Otherwise only metadata is compared which does not
> +     require to read package file contents.
> +
> +*-t, \--threads* <number>::
> +     Use 'number' threads to verify packages. Note that each thread may use 
> up
> +     to approximately 128MiB of memory. Default: 1
> +
> +See Also
> +--------
> +linkman:repo-add[8]
> +
> +include::footer.txt[]
> diff --git a/contrib/verify-pacman-repo-db.pl 
> b/contrib/verify-pacman-repo-db.pl
> new file mode 100755
> index 0000000..71cfddf
> --- /dev/null
> +++ b/contrib/verify-pacman-repo-db.pl
> @@ -0,0 +1,279 @@
> +#!/usr/bin/perl -T
> +use warnings;
> +use strict;
> +
> +=pod
> +
> +=head1 SYNOPSIS
> +
> +verify-pacman-repo-db.pl [options] <database file> ...
> +
> + Options:
> +   --help, -h         Show short help message
> +   --debug            Enable debug output
> +   --checksum, -c     Verify checksums of packages
> +   --thread, -t <num> Use num threads to verify packages. Default: 1
> +                      NOTE: Each thread uses up to approx. 128MiB of memory
> +
> +=head1 DESCRIPTION
> +
> +verify-pacman-repo-db.pl looks at a pacman repo database and verifies its 
> content
> +with the actual package files. The database is expected to be in the same
> +directory as the packages (or symlinks to the packages).
> +
> +The following properties are verified for each package in the database:
> +
> + - existence of the package file
> + - file size
> + - MD5 and SHA256 checksum (--checksum)
> +
> +=head1 NOTES
> +
> +This script does intentionally not use any ALPM libraries. The format is 
> simple
> +enough to be parsed and this way we might just detect more problems because 
> the
> +libalpm parsing code might also have bugs. We also stay much more portable
> +which might be good for people that want to check a db, but don't actually 
> have
> +pacman installed.
> +
> +=cut
> +
> +package main;
> +use Getopt::Long;
> +use Pod::Usage;
> +
> +exit main();
> +
> +sub main {
> +     my %opts = (
> +             threads => 1,
> +     );
> +
> +     Getopt::Long::Configure ("bundling");
> +     pod2usage(-verbose => 0) if (@ARGV== 0);
> +     GetOptions(\%opts, "help|h", "debug", "threads|t=i", "checksum|c") or 
> pod2usage(2);
> +     pod2usage(0) if $opts{help};
> +
> +     my $verifier = Verifier->new(\%opts);
> +
> +     for my $repodb (@ARGV) {
> +             $verifier->check_repodb($repodb);
> +     }
> +
> +     $verifier->finalize();
> +     return $verifier->get_error_status();
> +}
> +
> +package Verifier;
> +use Archive::Tar;
> +use Digest::MD5;
> +use Digest::SHA;
> +use File::Basename;
> +use threads;
> +use threads::shared;
> +use Thread::Queue;
> +
> +sub new {
> +     my $class = shift;
> +     my $opts = shift;
> +
> +     my $self :shared = shared_clone({
> +             opts => \%{$opts},
> +             package_queue => Thread::Queue->new(),
> +             output_queue => Thread::Queue->new(),
> +             workers => [],
> +             errors => 0,
> +     });
> +
> +     bless $self, $class;
> +     $self->start_workers();
> +     return $self;
> +}
> +
> +sub start_workers {
> +     my $self = shift;
> +
> +     threads->new(\&_worker_output_queue, $self);
> +
> +     for (my $i = 0; $i < $self->{opts}->{threads}; $i++) {
> +             my $thr :shared = 
> shared_clone(threads->new(\&_worker_package_queue, $self));
> +             push @{$self->{workers}}, $thr;
> +     }
> +}
> +
> +sub _worker_package_queue {
> +     my $self = shift;
> +     while (my $workpack = $self->{package_queue}->dequeue()) {
> +             my $dbdata = 
> $self->_parse_db_entry($workpack->{db_desc_content});
> +             $self->{errors} += 
> $self->_verify_db_entry($workpack->{dirname}, $dbdata);
> +     }
> +}
> +
> +sub _worker_output_queue {
> +     my $self = shift;
> +     while (my $output = $self->{output_queue}->dequeue()) {
> +             print STDERR $output;
> +     }
> +}
> +
> +sub finalize {
> +     my $self = shift;
> +
> +     $self->{package_queue}->end();
> +     $self->_join_threads($self->{workers});
> +
> +     $self->{output_queue}->end();
> +     $self->_join_threads([threads->list]);
> +}
> +
> +sub _join_threads {
> +     my $self = shift;
> +     my $threads = shift;
> +
> +     for my $thr (@{$threads}) {
> +             if ($thr->tid && !threads::equal($thr, threads->self)) {
> +                     print "waiting for thread ".$thr->tid()." to finish\n" 
> if $self->{opts}->{debug};
> +                     $thr->join;
> +             }
> +     }
> +}
> +
> +sub get_error_status {
> +     my $self = shift;
> +
> +     return $self->{errors} > 0;
> +}
> +
> +sub check_repodb {
> +     my $self = shift;
> +     my $repodb = shift;
> +
> +     my $db = Archive::Tar->new();
> +     $db->read($repodb);
> +
> +     my $dirname = dirname($repodb);
> +     my $pkgcount = 0;
> +
> +     my @files = $db->list_files();
> +     for my $file_object ($db->get_files()) {
> +             if ($file_object->name =~ m/^([^\/]+)\/desc$/) {
> +                     my $package = $1;
> +                     $self->{package_queue}->enqueue({
> +                                     package => $package,
> +                                     db_desc_content => 
> $file_object->get_content(),
> +                                     dirname => $dirname,
> +                             });
> +                     $pkgcount++;
> +             }
> +     }
> +
> +     $self->_debug(sprintf("Queued %d package from database '%s'\n", 
> $pkgcount, $repodb));
> +}
> +
> +sub _parse_db_entry {
> +     my $self = shift;
> +     my $content = shift;
> +     my %db;
> +     my $key;
> +
> +     for my $line (split /\n/, $content) {
> +             if ($line eq '') {
> +                     $key = undef;
> +                     next;
> +             }
> +             if ($line =~ m/^%(.+)%$/) {
> +                     $key = $1;
> +             } else {
> +                     push @{$db{$key}}, $line;
> +                     die "\$key not set. Is the db formated incorrectly?" 
> unless $key;
> +             }
> +     }
> +     return \%db;
> +}
> +
> +sub _output {
> +     my $self = shift;
> +     my $output = shift;
> +
> +     return if $output eq "";
> +
> +     $output = sprintf("Thread %s: %s", threads->self->tid(), $output);
> +     $self->{output_queue}->enqueue($output);
> +}
> +
> +sub _debug {
> +     my $self = shift;
> +     my $output = shift;
> +     $self->_output($output) if $self->{opts}->{debug};
> +}
> +
> +sub _verify_db_entry {
> +     my $self = shift;
> +     my $basedir = shift;
> +     my $dbdata = shift;
> +     my $ret = 0;
> +     my $output = "";
> +
> +     # verify package exists
> +     my $pkgfile = $basedir.'/'.$dbdata->{FILENAME}[0];
> +     $self->_debug(sprintf("Checking package %s\n", $dbdata->{FILENAME}[0]));
> +     unless (-e $pkgfile) {
> +             $self->_output(sprintf("Package file missing: %s\n", $pkgfile));
> +             return 1;
> +     }
> +
> +     $ret += $self->_verify_package_size($dbdata, $pkgfile);
> +     $ret += $self->_verify_package_checksum($dbdata, $pkgfile) if 
> $self->{opts}->{checksum};
> +     # TODO verify gpg sigs?
> +
> +     return $ret;
> +}
> +
> +sub _verify_package_size {
> +     my $self = shift;
> +     my $dbdata = shift;
> +     my $pkgfile = shift;
> +
> +     my $csize = $dbdata->{CSIZE}[0];
> +     my $filesize = (stat($pkgfile))[7];
> +     unless ($csize == $filesize) {
> +             $self->_output(sprintf("Package file has incorrect size: %d vs 
> %d: %s\n", $csize, $filesize, $pkgfile));
> +             return 1;
> +     }
> +     return 0;
> +}
> +
> +sub _verify_package_checksum {
> +     my $self = shift;
> +     my $dbdata = shift;
> +     my $pkgfile = shift;
> +
> +     my $md5 = Digest::MD5->new;
> +     my $sha = Digest::SHA->new(256);
> +
> +     my $content;
> +     # 128MiB to keep random IO low when using multiple threads (only works 
> for large packages though)
> +     my $chunksize = 1024*1024*128;
> +     open my $fh, "<", $pkgfile;
> +     while (read($fh, $content, $chunksize)) {
> +             $md5->add($content);
> +             $sha->add($content);
> +     }
> +
> +     my $expected_sha = $dbdata->{SHA256SUM}[0];
> +     my $expected_md5 = $dbdata->{MD5SUM}[0];
> +     my $got_md5 = $md5->hexdigest;
> +     my $got_sha = $sha->hexdigest;
> +
> +     unless ($expected_sha eq $got_sha and $expected_md5 eq $got_md5) {
> +             my $output;
> +             $output .= sprintf "Package file has incorrect checksum: %s\n", 
> $pkgfile;
> +             $output .= sprintf "expected: SHA %s\n", $expected_sha;
> +             $output .= sprintf "got:      SHA %s\n", $got_sha;
> +             $output .= sprintf "expected: MD5 %s\n", $expected_md5;
> +             $output .= sprintf "got:      MD5 %s\n", $got_md5;
> +             $self->_output($output);
> +             return 1;
> +     }
> +     return 0;
> +}
> +
> 

Reply via email to