Bug#677250: Acknowledgement (xz files with multiple blocks are not recognized)

Vincent Ladeuil Tue, 19 Jun 2012 01:03:20 -0700

>>>>> Joey Hess <[email protected]> writes:

    >> +        if (scalar(@filters) != 1 || $filters[0] !~ /^--lzma2=/) {
    >> +            die "Only LZMA2 is supported" ;


    >> +            die "Unkown dict size: $dict_size\n" if 
(!defined($presets)) ;

    >> +    my $check_kwd = $check_kwd_of{$check_name} ;
    >> +    die "Unknown xz check: $check_name\n" if (!defined($check_kwd)) ;

    > Could it just fall back to the old guessing behavior instead of dying?

It does, in reproducexz:

        eval {
            $possible_args = readxz($orig);
        } ;
        # If we get an error we fallback to guessing, otherwise, we should
        # succeed with one of the proposed combinations
        if (! $@) {

    >> +            foreach my $program (@supported_xz_programs) {
    >> +                # try to guess the xz arguments that are needed
    >> foreach my $args (predictxzargs($possible_levels, $program)) {
    >> -                        testvariant($orig, $tmpin, $program, @$args)
    >> -                                && return $program, @$args;
    >> +                    testvariant($orig, $tmpin, $program, @$args)
    >> +                        && return $program, @$args;

    > Your editor is replacing tabs with spaces..

I can fix that.

   modified      pristine-xz
                                                                        

=== modified file 'pristine-xz'
--- pristine-xz	2012-06-12 15:45:07 +0000
+++ pristine-xz	2012-06-19 07:47:17 +0000
@@ -101,6 +101,133 @@
 	print STDERR "       pristine-xz [-vdkt] genxz delta file\n";
 }
 
+sub assign_fields {
+	my ($hash, $labels, $fields) = @_ ;
+	@$hash{@$labels} = @$fields[1..scalar(@$labels)] ;
+}
+
+sub scan_xz_lvv_robot {
+	my ($filename) = @_ ;
+	# We need at least version 5.0 to get a proper '-lvv --robot'
+	# implemented
+	my $cmd = "xz -lvv --robot $filename" ;
+	my $ret = open (my $in, "$cmd |") || die "$cmd failed: $!";
+	my %xz = (file => {}, stream => {}, blocks => [],
+		  summary => {}, totals => {}) ;
+	my (%file, %stream, @blocks, %summary, %totals) ;
+	my @file_labels = qw{nb_streams nb_blocks compressed uncompressed
+			     ratio checks padding_size} ;
+	my @stream_labels =
+		qw{stream_num nb_blocks compressed_offset uncompressed_offset
+		   compressed_size uncompressed_size ratio check_name
+		   padding_size};
+	my @block_labels = 
+		qw{stream_num block_in_stream block_in_file compressed_offset
+		   uncompressed_offset compressed_size uncompressed_size ratio
+		   check_name check_value header_size size_present_flags
+		   actual_compressed_size uncompress_memory filter_chain} ;
+	my @summary_labels = qw{uncompressed_memory size_in_blocks} ;
+	my @totals_labels =
+		qw{nb_streams nb_blocks compressed_size uncompressed_size ratio
+		   check_names padding_size nb_files uncompressed_memory
+		   size_in_blocks} ;
+
+	while (my $line = <$in>) {
+		chomp $line ;
+		my @fields = split(/\t/, $line) ;
+		if ($fields[0] eq 'name') {
+			next ;
+		}
+		if ($fields[0] eq 'file') {
+			assign_fields($xz{file}, \@file_labels, \@fields) ;
+			next ;
+		}
+		if ($fields[0] eq 'stream') {
+			assign_fields($xz{stream}, \@stream_labels, \@fields) ;
+			next ;
+		}
+		if ($fields[0] eq 'block') {
+			my %block ;
+			assign_fields(\%block, \@block_labels, \@fields) ;
+			push @{$xz{blocks}}, \%block ;
+			next ;
+		}
+		if ($fields[0] eq 'summary') {
+			assign_fields($xz{summary}, \@summary_labels, \@fields);
+			next ;
+		}
+		if ($fields[0] eq 'totals') {
+			assign_fields($xz{totals}, \@totals_labels, \@fields) ;
+			next ;
+		}
+	}
+	close $in ;
+	return \%xz ;
+}
+
+sub predict_xz_args {
+	my ($xz) = @_ ;
+	my $presets = undef ;
+	my $block_split = undef ;
+	my $blocks = $xz->{blocks} ;
+	if (scalar(@$blocks)) {
+		# There is at least one block. We assume the same compression
+		# level for all blocks
+		my $block = $blocks->[0] ;
+		my @filters = split(/,/, $block->{filter_chain}) ;
+		if (scalar(@filters) != 1 || $filters[0] !~ /^--lzma2=/) {
+			die "Only LZMA2 is supported" ;
+		}
+		# Deduce the presets from the dict size
+		if ($filters[0] =~ /--lzma2=dict=(.*)/) {
+			my $dict_size = $1 ;
+			my %lzma2_presets_from_dict_size_of =
+				('256KiB' => ['0'],
+				 '1Mib'   => ['1'],
+				 '2MiB'   => ['2'],
+				 '4MiB'   => ['4', '3'],
+				 # Put 6 before 5 as it's the default and is
+				 # more likely to be right
+				 '8MiB'   => ['6', '5'],
+				 '16MiB'  => ['7'],
+				 '32MiB'  => ['8'],
+				 '64MiB'  => ['9'],
+				) ;
+			$presets = $lzma2_presets_from_dict_size_of{$dict_size};
+			die "Unkown dict size: $dict_size\n"
+				if (!defined($presets)) ;
+		}
+		if (scalar(@$blocks) > 1) {
+			# Gather the block uncompressed sizes
+			$block_split = join(',', map {$_->{uncompressed_size}}
+					    @$blocks) ;
+		}
+	}
+	# FIXME: none is missing
+	my %check_kwd_of = 
+		(CRC32 => 'crc32',
+		 CRC64 => 'crc64',
+		 'SHA-256' => 'sha256',
+		) ;
+	my $check_name = $xz->{stream}->{check_name} ;
+	my $check_kwd = $check_kwd_of{$check_name} ;
+	die "Unknown xz check: $check_name\n" if (!defined($check_kwd)) ;
+
+	my $possible_args = [] ;
+	my $common = ["--check=$check_kwd", "-z"] ;
+	# FIXME: --block-split is not (yet) part of xz-utils upstream
+	if (0 && defined($block_split)) {
+		# We put the block list in front of the parameters to make it
+		# easier to filter it later.
+		unshift @$common, "--block-split=$block_split" ;
+	}
+	foreach my $preset (@$presets) {
+		push @$possible_args, [@$common, "-$preset"] ;
+		push @$possible_args, [@$common, "-${preset}e"] ;
+	}
+	return $possible_args ;
+}
+
 sub readxz {
 	my $filename = shift;
 
@@ -108,6 +235,22 @@
 		error "This is not a valid xz archive.";
 	}
 
+	# This will guess the compression level, check and blocks from the file.
+	# More info is still needed if the level used was 3/4 or 5/6 (see
+	# lzma2_presets_from_dict_size_of in predict_xz_args) or if --extreme
+	# was used. We output possible args for each combination in this case.
+	my $xz = scan_xz_lvv_robot($filename) ;
+	my $possible_args = predict_xz_args($xz) ;
+	return $possible_args ;
+}
+
+sub predictxzlevels {
+	my $filename = shift;
+
+	if (! is_xz($filename)) {
+		error "This is not a valid xz archive.";
+	}
+
 	# XXX We don't currently have a way to guess the level from the
 	# file format, as this level only presets several other tunables.
 	# Correct handling would involve finding as many preset values as
@@ -155,21 +298,37 @@
 	my $orig=shift;
 
 	my $wd=tempdir();
-	
+
 	my $tmpin="$wd/test";
 	doit_redir($orig, $tmpin, "xz", "-dc");
 
 	# read fields from xz headers
-	my ($possible_levels) = readxz($orig);
+	my $possible_args ;
+        eval {
+		$possible_args = readxz($orig);
+        } ;
+        # If we get an error we fallback to guessing, otherwise, we should
+        # succeed with one of the proposed combinations
+        if (! $@) {
+		foreach my $program (@supported_xz_programs) {
+			foreach my $args (@$possible_args) {
+				testvariant($orig, $tmpin, $program, @$args)
+					&& return $program, @$args;
+			}
+		}
+        } else {
+		# Fallback to guessing
+		my ($possible_levels) = predictxzlevels($orig);
 
-	foreach my $program (@supported_xz_programs) {
-		# try to guess the xz arguments that are needed by the
-		# header information
-		foreach my $args (predictxzargs($possible_levels, $program)) {
-			testvariant($orig, $tmpin, $program, @$args)
-				&& return $program, @$args;
+		foreach my $program (@supported_xz_programs) {
+			# try to guess the xz arguments that are needed
+			foreach my $args (predictxzargs($possible_levels,
+							$program)) {
+				testvariant($orig, $tmpin, $program, @$args)
+					&& return $program, @$args;
+			}
 		}
-	}
+        }
 
 	print STDERR "pristine-xz failed to reproduce build of $orig\n";
 	print STDERR "(Please file a bug report.)\n";

Bug#677250: Acknowledgement (xz files with multiple blocks are not recognized)

Reply via email to