Signed-off-by: Florian Pritz <[email protected]>
---
 contrib/doc/verify-pacman-repo-db.1.txt |  50 ++++++
 contrib/verify-pacman-repo-db.pl        | 279 ++++++++++++++++++++++++++++++++
 2 files changed, 329 insertions(+)
 create mode 100644 contrib/doc/verify-pacman-repo-db.1.txt
 create mode 100755 contrib/verify-pacman-repo-db.pl

diff --git a/contrib/doc/verify-pacman-repo-db.1.txt 
b/contrib/doc/verify-pacman-repo-db.1.txt
new file mode 100644
index 0000000..01eafbe
--- /dev/null
+++ b/contrib/doc/verify-pacman-repo-db.1.txt
@@ -0,0 +1,50 @@
+/////
+vim:set ts=4 sw=4 syntax=asciidoc noet spell spelllang=en_us:
+/////
+verify-pacman-repo-db(1)
+=======================
+
+Name
+----
+verify-pacman-repo-db - package repository verification utility
+
+
+Synopsis
+--------
+'verify-pacman-repo-db' [options]
+
+
+Description
+-----------
+'verify-pacman-repo-db' looks at a pacman repo database and verifies its
+content with the actual package files. The database is expected to be in
+the same directory as the packages (or symlinks to the packages).
+
+The following properties are verified for each package in the database:
+
+        - existence of the package file
+        - file size
+        - MD5 and SHA256 checksum (--checksum)
+
+Options
+-------
+*-h, \--help*::
+       Output a short help message.
+
+*\--debug*::
+       Enable debug output.
+
+*-c, \--checksum*::
+       Verify checksums of packages. Note that this means all packages files 
will
+       be read from disk. Otherwise only metadata is compared which does not
+       require to read package file contents.
+
+*-t, \--threads* <number>::
+       Use 'number' threads to verify packages. Note that each thread may use 
up
+       to approximately 128MiB of memory. Default: 1
+
+See Also
+--------
+linkman:repo-add[8]
+
+include::footer.txt[]
diff --git a/contrib/verify-pacman-repo-db.pl b/contrib/verify-pacman-repo-db.pl
new file mode 100755
index 0000000..71cfddf
--- /dev/null
+++ b/contrib/verify-pacman-repo-db.pl
@@ -0,0 +1,279 @@
+#!/usr/bin/perl -T
+use warnings;
+use strict;
+
+=pod
+
+=head1 SYNOPSIS
+
+verify-pacman-repo-db.pl [options] <database file> ...
+
+ Options:
+   --help, -h         Show short help message
+   --debug            Enable debug output
+   --checksum, -c     Verify checksums of packages
+   --thread, -t <num> Use num threads to verify packages. Default: 1
+                      NOTE: Each thread uses up to approx. 128MiB of memory
+
+=head1 DESCRIPTION
+
+verify-pacman-repo-db.pl looks at a pacman repo database and verifies its 
content
+with the actual package files. The database is expected to be in the same
+directory as the packages (or symlinks to the packages).
+
+The following properties are verified for each package in the database:
+
+ - existence of the package file
+ - file size
+ - MD5 and SHA256 checksum (--checksum)
+
+=head1 NOTES
+
+This script does intentionally not use any ALPM libraries. The format is simple
+enough to be parsed and this way we might just detect more problems because the
+libalpm parsing code might also have bugs. We also stay much more portable
+which might be good for people that want to check a db, but don't actually have
+pacman installed.
+
+=cut
+
+package main;
+use Getopt::Long;
+use Pod::Usage;
+
+exit main();
+
+sub main {
+       my %opts = (
+               threads => 1,
+       );
+
+       Getopt::Long::Configure ("bundling");
+       pod2usage(-verbose => 0) if (@ARGV== 0);
+       GetOptions(\%opts, "help|h", "debug", "threads|t=i", "checksum|c") or 
pod2usage(2);
+       pod2usage(0) if $opts{help};
+
+       my $verifier = Verifier->new(\%opts);
+
+       for my $repodb (@ARGV) {
+               $verifier->check_repodb($repodb);
+       }
+
+       $verifier->finalize();
+       return $verifier->get_error_status();
+}
+
+package Verifier;
+use Archive::Tar;
+use Digest::MD5;
+use Digest::SHA;
+use File::Basename;
+use threads;
+use threads::shared;
+use Thread::Queue;
+
+sub new {
+       my $class = shift;
+       my $opts = shift;
+
+       my $self :shared = shared_clone({
+               opts => \%{$opts},
+               package_queue => Thread::Queue->new(),
+               output_queue => Thread::Queue->new(),
+               workers => [],
+               errors => 0,
+       });
+
+       bless $self, $class;
+       $self->start_workers();
+       return $self;
+}
+
+sub start_workers {
+       my $self = shift;
+
+       threads->new(\&_worker_output_queue, $self);
+
+       for (my $i = 0; $i < $self->{opts}->{threads}; $i++) {
+               my $thr :shared = 
shared_clone(threads->new(\&_worker_package_queue, $self));
+               push @{$self->{workers}}, $thr;
+       }
+}
+
+sub _worker_package_queue {
+       my $self = shift;
+       while (my $workpack = $self->{package_queue}->dequeue()) {
+               my $dbdata = 
$self->_parse_db_entry($workpack->{db_desc_content});
+               $self->{errors} += 
$self->_verify_db_entry($workpack->{dirname}, $dbdata);
+       }
+}
+
+sub _worker_output_queue {
+       my $self = shift;
+       while (my $output = $self->{output_queue}->dequeue()) {
+               print STDERR $output;
+       }
+}
+
+sub finalize {
+       my $self = shift;
+
+       $self->{package_queue}->end();
+       $self->_join_threads($self->{workers});
+
+       $self->{output_queue}->end();
+       $self->_join_threads([threads->list]);
+}
+
+sub _join_threads {
+       my $self = shift;
+       my $threads = shift;
+
+       for my $thr (@{$threads}) {
+               if ($thr->tid && !threads::equal($thr, threads->self)) {
+                       print "waiting for thread ".$thr->tid()." to finish\n" 
if $self->{opts}->{debug};
+                       $thr->join;
+               }
+       }
+}
+
+sub get_error_status {
+       my $self = shift;
+
+       return $self->{errors} > 0;
+}
+
+sub check_repodb {
+       my $self = shift;
+       my $repodb = shift;
+
+       my $db = Archive::Tar->new();
+       $db->read($repodb);
+
+       my $dirname = dirname($repodb);
+       my $pkgcount = 0;
+
+       my @files = $db->list_files();
+       for my $file_object ($db->get_files()) {
+               if ($file_object->name =~ m/^([^\/]+)\/desc$/) {
+                       my $package = $1;
+                       $self->{package_queue}->enqueue({
+                                       package => $package,
+                                       db_desc_content => 
$file_object->get_content(),
+                                       dirname => $dirname,
+                               });
+                       $pkgcount++;
+               }
+       }
+
+       $self->_debug(sprintf("Queued %d package from database '%s'\n", 
$pkgcount, $repodb));
+}
+
+sub _parse_db_entry {
+       my $self = shift;
+       my $content = shift;
+       my %db;
+       my $key;
+
+       for my $line (split /\n/, $content) {
+               if ($line eq '') {
+                       $key = undef;
+                       next;
+               }
+               if ($line =~ m/^%(.+)%$/) {
+                       $key = $1;
+               } else {
+                       push @{$db{$key}}, $line;
+                       die "\$key not set. Is the db formated incorrectly?" 
unless $key;
+               }
+       }
+       return \%db;
+}
+
+sub _output {
+       my $self = shift;
+       my $output = shift;
+
+       return if $output eq "";
+
+       $output = sprintf("Thread %s: %s", threads->self->tid(), $output);
+       $self->{output_queue}->enqueue($output);
+}
+
+sub _debug {
+       my $self = shift;
+       my $output = shift;
+       $self->_output($output) if $self->{opts}->{debug};
+}
+
+sub _verify_db_entry {
+       my $self = shift;
+       my $basedir = shift;
+       my $dbdata = shift;
+       my $ret = 0;
+       my $output = "";
+
+       # verify package exists
+       my $pkgfile = $basedir.'/'.$dbdata->{FILENAME}[0];
+       $self->_debug(sprintf("Checking package %s\n", $dbdata->{FILENAME}[0]));
+       unless (-e $pkgfile) {
+               $self->_output(sprintf("Package file missing: %s\n", $pkgfile));
+               return 1;
+       }
+
+       $ret += $self->_verify_package_size($dbdata, $pkgfile);
+       $ret += $self->_verify_package_checksum($dbdata, $pkgfile) if 
$self->{opts}->{checksum};
+       # TODO verify gpg sigs?
+
+       return $ret;
+}
+
+sub _verify_package_size {
+       my $self = shift;
+       my $dbdata = shift;
+       my $pkgfile = shift;
+
+       my $csize = $dbdata->{CSIZE}[0];
+       my $filesize = (stat($pkgfile))[7];
+       unless ($csize == $filesize) {
+               $self->_output(sprintf("Package file has incorrect size: %d vs 
%d: %s\n", $csize, $filesize, $pkgfile));
+               return 1;
+       }
+       return 0;
+}
+
+sub _verify_package_checksum {
+       my $self = shift;
+       my $dbdata = shift;
+       my $pkgfile = shift;
+
+       my $md5 = Digest::MD5->new;
+       my $sha = Digest::SHA->new(256);
+
+       my $content;
+       # 128MiB to keep random IO low when using multiple threads (only works 
for large packages though)
+       my $chunksize = 1024*1024*128;
+       open my $fh, "<", $pkgfile;
+       while (read($fh, $content, $chunksize)) {
+               $md5->add($content);
+               $sha->add($content);
+       }
+
+       my $expected_sha = $dbdata->{SHA256SUM}[0];
+       my $expected_md5 = $dbdata->{MD5SUM}[0];
+       my $got_md5 = $md5->hexdigest;
+       my $got_sha = $sha->hexdigest;
+
+       unless ($expected_sha eq $got_sha and $expected_md5 eq $got_md5) {
+               my $output;
+               $output .= sprintf "Package file has incorrect checksum: %s\n", 
$pkgfile;
+               $output .= sprintf "expected: SHA %s\n", $expected_sha;
+               $output .= sprintf "got:      SHA %s\n", $got_sha;
+               $output .= sprintf "expected: MD5 %s\n", $expected_md5;
+               $output .= sprintf "got:      MD5 %s\n", $got_md5;
+               $self->_output($output);
+               return 1;
+       }
+       return 0;
+}
+
-- 
2.9.0

Reply via email to