Package: pristine-tar Version: 1.24 examples:
Many (> 100) kde 4.8.2 packages including: https://launchpad.net/ubuntu/precise/+source/marble/4:4.8.2-0ubuntu1/+files/marble_4.8.2.orig.tar.xz https://launchpad.net/ubuntu/precise/+source/kdesdk/4:4.8.2-0ubuntu1/+files/kdesdk_4.8.2.orig.tar.xz https://launchpad.net/ubuntu/precise/+source/kde-workspace/4:4.8.2-0ubuntu1/+files/kde-workspace_4.8.2.orig.tar.xz The same multiple blocks files can be found at ftp://ftp.kde.org/pub/kde/stable/4.8.4/src/ (examples includes kdesk, kalzium, oxygen-icons, etc). The following script is a first shot at parsing the .xz files and extracting the relevant parameters (compression level, check and blocks). It may be used to enhance the xz file recognition but doesn't provide a solution to rebuild multiple blocks xz files. #!/usr/bin/perl # Dotted numbers in comments refer to http://tukaani.org/xz/xz-file-format.txt use warnings; use strict; use constant DEBUG => 1 ; use Fcntl qw{:seek} ; use Data::Dumper ; sub checked_read { # $buf should be a reference so we can still use 'read' and its magic # trick my ($in, $buf, $nb_to_read, $msg) = @_ ; my $nb_read = read($in, $$buf, $nb_to_read) ; die $msg if ($nb_read != $nb_to_read) ; } # see 1.2 with slight adaptation since we read from a file handle instead of # using a char buffer sub read_multibyte_integer { my ($in, $size_max) = @_ ; return undef, 0 if ($size_max == 0) ; if ($size_max > 9) { $size_max = 9 ; } my $buf ; my $i = 1 ; checked_read($in, \$buf, 1, "Corrupted xz file\n") ; $buf = unpack('C', $buf) ; warn sprintf("buf: %d / %d", $buf, $buf & 0x7f) if (DEBUG >= 9) ; my $num = $buf & 0x7f ; while ($buf & 0x80) { return (undef, 0) if ($i >= $size_max) ; checked_read($in, \$buf, 1, "Corrupted xz file\n") ; $buf = unpack('C', $buf) ; warn sprintf("buf: %d / %d", $buf, $buf & 0x7f) if (DEBUG >= 9) ; $i++ ; return (undef, 0) if ($buf == 0x00) ; $num |= ($buf & 0x7f) << (($i - 1) * 7) ; } return ($num, $i) ; } sub decode_check_byte { my ($byte) = @_ ; my $check_key ; # We use the xz --check values as values below, the Ids are used as keys. my %known_check_of = ( 0x00 => 'none', 0x01 => 'crc32', 0x04 => 'crc64', 0x0A => 'sha256', ) ; $check_key = $known_check_of{$byte} ; if (!defined($check_key)) { die sprintf("[%02X] is an unknown xz check\n", $byte) ; } return $check_key ; } sub decode_stream_flags { my @flags = @_ ; die "Unknown flags used in first format byte\n" if ($flags[0] != 0) ; my $check = decode_check_byte($flags[1]) ; return [$check] ; } sub parse_stream_header { my ($in) = @_ ; my ($buf) ; checked_read($in, \$buf, 6, "Invalid xz file\n") ; my $magic = unpack('H12', $buf) ; die "Not an xz file\n" if ($magic ne 'fd377a585a00') ; checked_read($in, \$buf, 2, "Corrupted xz file\n") ; my @flags = unpack('CC', $buf) ; my $flags = decode_stream_flags(@flags) ; my $check = $flags->[0] ; checked_read($in, \$buf, 4, "Corrupted xz file\n") ; my $CRC32 = unpack('V', $buf) ; return [$check, $CRC32] ; } my %lzma2_preset_from_dict_size_of = ( 0x00040000 => ['0'], # 256 KiB 0x00100000 => ['1'], # 1 MiB 0x00200000 => ['2'], # 2 MiB 0x00400000 => ['3', '4'], # 4 MiB 0x00800000 => ['5', '6'], # 8 MiB 0x01000000 => ['7'], # 16 MiB 0x02000000 => ['8'], # 32 MiB 0x04000000 => ['9'], # 64 MiB ) ; sub decode_lzma2_props { my ($props) = @_ ; my $bits = unpack('C', $props) & 0x3f ; # see 5.3.1 my $dict_size ; if ($bits == 40) { $dict_size = 0xFFFFFFFF ; } else { $dict_size = 2 | ($bits & 1) ; $dict_size <<= $bits / 2 + 11 ; } my $presets = $lzma2_preset_from_dict_size_of{$dict_size} ; die "Unkown dict size: $dict_size\n" if (!defined($presets)) ; return {id => 'lzma2', presets => $presets} ; } my %props_decoder_of = ( 0x21 => \&decode_lzma2_props, ) ; sub decode_filter_props { my ($id, $props) = @_ ; my $decoder = $props_decoder_of{$id} ; die "$id is not a known filter\n" if (!defined($decoder)) ; return &$decoder($props) ; } sub parse_block_header { my ($in, $check) = @_ ; my ($buf) ; checked_read($in, \$buf, 1, "Corrupted xz file\n") ; my $size = unpack('C', $buf) ; $size = ($size + 1) * 4 ; # see 3.1.1 checked_read($in, \$buf, 1, "Corrupted xz file\n") ; my $flags = unpack('C', $buf) ; my $nb_filters = ($flags & 0x03) + 1 ; # 2 bits used to encode 1-4 values my $must_be_zero = $flags & 0x3C ; die "Reversed block flags non-zero\n" if ($must_be_zero != 0) ; my $compressed_size_present = $flags & 0x40 ; die "Not implemented\n" if ($compressed_size_present != 0) ; my $uncompressed_size_present = $flags & 0x80 ; die "Not implemented\n" if ($uncompressed_size_present != 0) ; warn "flags: ".Dumper([$nb_filters, $must_be_zero, $compressed_size_present, $uncompressed_size_present])."\n" if (DEBUG >= 2) ; my $filters = [] ; my $remaining_filters = $nb_filters ; while ($remaining_filters) { my $filter ; my ($lg, $id, $prop_size) ; ($id, $lg) = read_multibyte_integer($in, 9) ; warn "Id: $id, lg: $lg\n" if (DEBUG >= 2) ; ($prop_size, $lg) = read_multibyte_integer($in, 9) ; warn "Size: $prop_size, lg: $lg\n" if (DEBUG >= 2) ; my $props ; checked_read($in, \$props, $prop_size, "Corrupted xz file\n") ; warn "Props: $props\n" if (DEBUG >= 2) ; my $decoded_props = decode_filter_props($id, $props) ; push @$filters, [$id, $decoded_props] ; $remaining_filters-- ; } return [$size, $filters] ; } sub parse_indices { my ($in, $backward_size) = @_ ; seek($in, - $backward_size, SEEK_CUR) or die "Cannot seek to index - $!\n"; my ($buf) ; checked_read($in, \$buf, 1, "Corrupted xz file\n") ; my $indicator = unpack('C', $buf) ; die "No index indicator ($buf)\n" if ($indicator != 0x00) ; my ($lg, $nb_records) ; ($nb_records, $lg) = read_multibyte_integer($in, 9) ; warn "nb records: $nb_records\n" if (DEBUG >= 3) ; my $total_lg = $lg ; my $records = [] ; my $remaining_records = $nb_records ; while ($remaining_records) { my $record ; my ($unpadded, $uncompressed) ; ($unpadded, $lg) = read_multibyte_integer($in, 9) ; warn "unpadded: $unpadded ($lg)\n" if (DEBUG >= 4) ; $total_lg += $lg ; ($uncompressed, $lg) = read_multibyte_integer($in, 9) ; warn "uncompressed: $uncompressed: ($lg)\n" if (DEBUG >= 4) ; $total_lg += $lg ; push @$records, [$unpadded, $uncompressed] ; $remaining_records-- ; } checked_read($in, \$buf, 4, "Corrupted xz file\n") ; my $CRC32 = unpack('V', $buf) ; return [$CRC32, $records] ; } sub parse_stream_footer { my ($in) = @_ ; seek($in, 0, SEEK_END) or die "Cannot seek to footer- $!\n"; my $file_size = tell($in) ; if ($file_size == -1 || $file_size % 4 != 0) { die "Corrupted or invalid xz file\n" } # CRC32: 4, backward size: 4, stream flags: 2, magic bytes: 2 my $footer_size = 4 + 4 + 2 + 2 ; seek($in, - $footer_size, SEEK_END) ; my $buf ; checked_read($in, \$buf, $footer_size, "Corrupted xz file\n") ; my ($CRC32, $backward_size, $flag1, $flag2, $magic) = unpack('VVC2H4', $buf) ; $backward_size = ($backward_size + 1) * 4 ; # see 2.1.2.2 my $flags = decode_stream_flags($flag1, $flag2) ; my $check = $flags->[0] ; die "Not an xz file\n" if ($magic ne '595a') ; # aka 'YZ' see 2.1.2.4 warn "CRC32_f: $CRC32, backward size: $backward_size, check: $check\n" if (DEBUG >= 2) ; # seek back to the beginning of the footer seek($in, - $footer_size, SEEK_END) ; return [$CRC32, $backward_size, $check] ; } sub parse_xz { my ($fname) = (shift) ; open (my $in, "<", $fname) || die "$fname: $!"; warn "Parsing $fname" if (DEBUG >= 1) ; my ($fhdr, $shdr, $bhdr, $sftr, $index); $fhdr = parse_stream_header($in) ; my ($check, $CRC32_h) = @$fhdr ; warn sprintf("check: %s, CRC32_h: %X\n", $check, $CRC32_h) if (DEBUG >= 2) ; $bhdr = parse_block_header($in, $check) ; warn "bhdr:".Dumper($bhdr)."\n" if (DEBUG >= 2) ; $sftr = parse_stream_footer($in, $check) ; warn "sftr:".Dumper($sftr)."\n" if (DEBUG >= 2) ; my ($CRC32_f, $backward_size, $check_f) = @$sftr ; warn sprintf("check: %s, CRC32_f: %X\n", $check, $CRC32_f) if (DEBUG >= 2) ; $index = parse_indices($in, $backward_size) ; my ($CRC32_i, $indices) = @$index ; warn sprintf("CRC32_i: %08X\n", $CRC32_i) if (DEBUG >= 2) ; warn "indices:".Dumper($indices)."\n" if (DEBUG >= 2) ; my $filters = $bhdr->[1] ; warn "filters:".Dumper($filters)."\n" if (DEBUG >= 2) ; die "Can't decode more than one filter\n" if ($#$filters > 0) ; my $filter = $filters->[0]->[1] ; warn "filter:".Dumper($filter)."\n" if (DEBUG >= 2) ; printf("Id: %s, check: %s, presets: %s\n", $filter->{id}, $check, join('|', @{$filter->{presets}})) ; my @input_block_sizes = map {$_->[1]} @$indices ; printf("Block(s): %s\n", join(", ", @input_block_sizes)) ; close $in; } sub main { if ($#ARGV != 0) { die "A single file name is expected\n" ; } parse_xz($ARGV[0]) ; } main() ; -- To UNSUBSCRIBE, email to [email protected] with a subject of "unsubscribe". Trouble? Contact [email protected]

