Niko Tyni wrote: > I see #895175 was a similar issue and resulted in a rather complicated > regexp in Lintian::Collect::Package for parsing tar output (commit > a75f3edcb099bd4b8794e2ecb7fd19e129e77f03). I expect something like that > should work here as well. Sorry about the lack of a patch.
No worries. This is actually a bit more complicated that I thought. Unfinished patch follows (note the FIXME): --- a/collection/src-orig-index +++ b/collection/src-orig-index @@ -38,7 +38,7 @@ use lib "$ENV{'LINTIAN_ROOT'}/lib"; use Lintian::Collect::Dispatcher qw(create_info); use Lintian::Processable::Source; -use Lintian::Util qw(internal_error sort_file_index gzip); +use Lintian::Util qw(internal_error sort_file_index gzip $TARTVF_REGEX); use constant EMPTY => q{}; use constant SPACE => q{ }; @@ -183,7 +183,7 @@ sub index_orig { # prefix. my $prefix; for my $line (@index) { - my ($filename) = ($line =~ /^(?:\S+\s+){5}(.*)/); + my $filename = ($line =~ $TARTVF_REGEX)[5]; $filename =~ s,^\./+,,o; my ($dirname) = ($filename =~ m,^([^/]+),); if ( defined $dirname @@ -212,11 +212,14 @@ sub index_orig { # then strip the prefix and add $compname (if any) if ($prefix) { @index = map { - if (m,^((?:\S+\s+){5})(?:\./)?\Q$prefix\E(?:/+(.*+)?|\Z),){ - my ($data, $file) = ($1, $2); + my @line = $_ =~ $TARTVF_REGEX; + if (@line) { + my $file = $line[5]; + $file =~ s/^(?:\.\/)?\Q$prefix\E\/+//; if ($file && $file !~ m,^(?:/++)?\Z,o){ $file = "$compname/$file" if $compname; - "$data$file\n"; + $line[5] = $file; + join(' ', @line) . "\n"; } else { (); } @@ -228,6 +231,7 @@ sub index_orig { # Prefix with the compname (because that is where they will be # unpacked to. @index = map { + # FIXME: Use $TARTVF_REGEX s{^((?:\S++\s++){5})(?:\./)?\Q$prefix\E(?:/+)?} {$1$compname/}r } @index; diff --git a/lib/Lintian/Info/Package.pm b/lib/Lintian/Info/Package.pm index 91011d8c7..ebf717875 100644 --- a/lib/Lintian/Info/Package.pm +++ b/lib/Lintian/Info/Package.pm @@ -30,7 +30,7 @@ use Scalar::Util qw(blessed); use Lintian::Path; use Lintian::Path::FSInfo; use Lintian::Util - qw(internal_error open_gz perm2oct normalize_pkg_path dequote_name); + qw(internal_error open_gz perm2oct normalize_pkg_path dequote_name $TARTVF_REGEX); use Moo::Role; use namespace::clean; @@ -286,14 +286,7 @@ sub _fetch_index_data { my (%file, $perm, $operm, $ownership, $name, $raw_type, $size); my ($date, $time); - # Parse line from output of "tar -tvf" allowing for spaces within the - # ownership field whilst still allowing spaces in filenames. (#895175) - # - # Note this cannot ever be 100% reliable as the filename might contain - # "fake" dates. - ($perm,$ownership,$size,$date,$time,$name) - = $line - =~ /^(.{10}) (.*?) (\d+) ([-\d]{10}) (?:([:\d]{5,8}(?:.\d+)?)[ ]+)?(.*)$/; + ($perm,$ownership,$size,$date,$time,$name) = $line =~ $TARTVF_REGEX; croak "cannot parse tar output from $index: \"$line\"" unless defined $perm; $ownership =~ s/\s+$//; diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm index 2487f5af7..84cab8135 100644 --- a/lib/Lintian/Util.pm +++ b/lib/Lintian/Util.pm @@ -85,6 +85,7 @@ BEGIN { $PKGNAME_REGEX $PKGREPACK_REGEX $PKGVERSION_REGEX + $TARTVF_REGEX )); } @@ -165,6 +166,19 @@ our $PKGVERSION_REGEX = qr/ (?: - [0-9A-Za-z.+:~]+ )* # Optional debian revision (+ upstreams versions with hyphens) /xoa; § Regards, -- ,''`. : :' : Chris Lamb `. `'` la...@debian.org 🍥 chris-lamb.co.uk `-