Package: pristine-tar
Version: 1.24

examples:

Many (> 100) kde 4.8.2 packages including:

https://launchpad.net/ubuntu/precise/+source/marble/4:4.8.2-0ubuntu1/+files/marble_4.8.2.orig.tar.xz

https://launchpad.net/ubuntu/precise/+source/kdesdk/4:4.8.2-0ubuntu1/+files/kdesdk_4.8.2.orig.tar.xz

https://launchpad.net/ubuntu/precise/+source/kde-workspace/4:4.8.2-0ubuntu1/+files/kde-workspace_4.8.2.orig.tar.xz


The same multiple blocks files can be found at
ftp://ftp.kde.org/pub/kde/stable/4.8.4/src/ (examples includes kdesk,
kalzium, oxygen-icons, etc).

The following script is a first shot at parsing the .xz files and
extracting the relevant parameters (compression level, check and
blocks).

It may be used to enhance the xz file recognition but doesn't provide a
solution to rebuild multiple blocks xz files.

#!/usr/bin/perl

# Dotted numbers in comments refer to http://tukaani.org/xz/xz-file-format.txt

use warnings;
use strict;
use constant DEBUG => 1 ;

use Fcntl qw{:seek} ;

use Data::Dumper ;

sub checked_read {
    # $buf should be a reference so we can still use 'read' and its magic
    # trick
    my ($in, $buf, $nb_to_read, $msg) = @_ ;
    my $nb_read = read($in, $$buf, $nb_to_read) ;
    die $msg if ($nb_read != $nb_to_read) ;
}

# see 1.2 with slight adaptation since we read from a file handle instead of
# using a char buffer
sub read_multibyte_integer {
    my ($in, $size_max) = @_ ;
    return undef, 0 if ($size_max == 0) ;

    if ($size_max > 9) {
        $size_max = 9 ;
    }
    my $buf ;
    my $i = 1 ;
    checked_read($in, \$buf, 1, "Corrupted xz file\n") ;
    $buf = unpack('C', $buf) ;
    warn sprintf("buf: %d / %d", $buf, $buf & 0x7f) if (DEBUG >= 9) ;
    my $num = $buf & 0x7f ;

    while ($buf & 0x80) {
        return (undef, 0) if ($i >= $size_max) ;

        checked_read($in, \$buf, 1, "Corrupted xz file\n") ;
        $buf = unpack('C', $buf) ;
        warn sprintf("buf: %d / %d", $buf, $buf & 0x7f) if (DEBUG >= 9) ;
        $i++ ;
        return (undef, 0) if ($buf == 0x00) ;

        $num |= ($buf & 0x7f) << (($i - 1) * 7) ;
    }
    return ($num, $i) ;
}

sub decode_check_byte {
    my ($byte) = @_ ;
    my $check_key ;
    # We use the xz --check values as values below, the Ids are used as keys.
    my %known_check_of =
    (
     0x00 => 'none',
     0x01 => 'crc32',
     0x04 => 'crc64',
     0x0A => 'sha256',
    ) ;
    $check_key = $known_check_of{$byte} ;
     if (!defined($check_key)) {
         die sprintf("[%02X] is an unknown xz check\n", $byte) ;
     }
    return $check_key ;
}

sub decode_stream_flags {
    my @flags = @_ ;
    die "Unknown flags used in first format byte\n" if ($flags[0] != 0) ;
    my $check = decode_check_byte($flags[1]) ;
    return [$check] ;
}

sub parse_stream_header {
    my ($in) = @_ ;
    my ($buf) ;
    checked_read($in, \$buf, 6, "Invalid xz file\n") ;
    my $magic = unpack('H12', $buf) ;
    die "Not an xz file\n" if ($magic ne 'fd377a585a00') ;
    checked_read($in, \$buf, 2, "Corrupted xz file\n") ;
    my @flags = unpack('CC', $buf) ;
    my $flags = decode_stream_flags(@flags) ;
    my $check = $flags->[0] ;
    checked_read($in, \$buf, 4, "Corrupted xz file\n") ;
    my $CRC32 = unpack('V', $buf) ;
    return [$check, $CRC32] ;
}



my %lzma2_preset_from_dict_size_of =
    (
     0x00040000 => ['0'],      # 256 KiB
     0x00100000 => ['1'],      #   1 MiB
     0x00200000 => ['2'],      #   2 MiB
     0x00400000 => ['3', '4'], #   4 MiB
     0x00800000 => ['5', '6'], #   8 MiB
     0x01000000 => ['7'],      #  16 MiB
     0x02000000 => ['8'],      #  32 MiB
     0x04000000 => ['9'],      #  64 MiB
    ) ;

sub decode_lzma2_props {
    my ($props) = @_ ;
    my $bits = unpack('C', $props) & 0x3f ;
    # see 5.3.1
    my $dict_size ;
    if ($bits == 40) {
        $dict_size = 0xFFFFFFFF ;
    } else {
        $dict_size = 2 | ($bits & 1) ;
        $dict_size <<= $bits / 2 + 11 ;
    }
    my $presets = $lzma2_preset_from_dict_size_of{$dict_size} ;
    die "Unkown dict size: $dict_size\n" if (!defined($presets)) ;
    return {id => 'lzma2', presets => $presets} ;
}

my %props_decoder_of =
    (
     0x21 => \&decode_lzma2_props,
    ) ;

sub decode_filter_props {
    my ($id, $props) = @_ ;
    my $decoder = $props_decoder_of{$id} ;
    die "$id is not a known filter\n" if (!defined($decoder)) ;

    return &$decoder($props) ;
}

sub parse_block_header {
    my ($in, $check) = @_ ;
    my ($buf) ;
    checked_read($in, \$buf, 1, "Corrupted xz file\n") ;
    my $size = unpack('C', $buf) ;
    $size = ($size + 1) * 4 ; # see 3.1.1
    checked_read($in, \$buf, 1, "Corrupted xz file\n") ;
    my $flags = unpack('C', $buf) ;
    my $nb_filters = ($flags & 0x03) + 1 ; # 2 bits used to encode 1-4 values
    my $must_be_zero = $flags & 0x3C ;
    die "Reversed block flags non-zero\n" if ($must_be_zero != 0) ;
    my $compressed_size_present = $flags & 0x40 ;
    die "Not implemented\n" if ($compressed_size_present != 0) ;
    my $uncompressed_size_present = $flags & 0x80 ;
    die "Not implemented\n" if ($uncompressed_size_present != 0) ;
    warn "flags: ".Dumper([$nb_filters, $must_be_zero,
                           $compressed_size_present,
                           $uncompressed_size_present])."\n"
                               if (DEBUG >= 2) ;
    my $filters = [] ;
    my $remaining_filters = $nb_filters ;
    while ($remaining_filters)
    {
        my $filter ;
        my ($lg, $id, $prop_size) ;
        ($id, $lg) = read_multibyte_integer($in, 9) ;
        warn "Id: $id, lg: $lg\n" if (DEBUG >= 2) ;
        ($prop_size, $lg) = read_multibyte_integer($in, 9) ;
        warn "Size: $prop_size, lg: $lg\n" if (DEBUG >= 2) ;
        my $props ;
        checked_read($in, \$props, $prop_size, "Corrupted xz file\n") ;
        warn "Props: $props\n" if (DEBUG >= 2) ;
        my $decoded_props = decode_filter_props($id, $props) ;
        push @$filters, [$id, $decoded_props] ;
        $remaining_filters-- ;
    }
    return [$size, $filters] ;
}

sub parse_indices {
    my ($in, $backward_size) = @_ ;
    seek($in, - $backward_size, SEEK_CUR) or die "Cannot seek to index - $!\n";
    my ($buf) ;
    checked_read($in, \$buf, 1, "Corrupted xz file\n") ;
    my $indicator = unpack('C', $buf) ;
    die "No index indicator ($buf)\n" if ($indicator != 0x00) ;
    my ($lg, $nb_records) ;
    ($nb_records, $lg) = read_multibyte_integer($in, 9) ;
    warn "nb records: $nb_records\n" if (DEBUG >= 3) ;
    my $total_lg = $lg ;
    my $records = [] ;
    my $remaining_records = $nb_records ;
    while ($remaining_records)
    {
        my $record ;
        my ($unpadded, $uncompressed) ;
        ($unpadded, $lg) = read_multibyte_integer($in, 9) ;
        warn "unpadded: $unpadded ($lg)\n" if (DEBUG >= 4) ;
        $total_lg += $lg ;
        ($uncompressed, $lg) = read_multibyte_integer($in, 9) ;
        warn "uncompressed: $uncompressed: ($lg)\n" if (DEBUG >= 4) ;
        $total_lg += $lg ;
        push @$records, [$unpadded, $uncompressed] ;
        $remaining_records-- ;
    }
    checked_read($in, \$buf, 4, "Corrupted xz file\n") ;
    my $CRC32 = unpack('V', $buf) ;
    return [$CRC32, $records] ;
}

sub parse_stream_footer {
    my ($in) = @_ ;
    seek($in, 0, SEEK_END) or die "Cannot seek  to footer- $!\n";
    my $file_size = tell($in) ;
    if ($file_size == -1 || $file_size % 4 != 0) {
        die "Corrupted or invalid xz file\n"
    }
    # CRC32: 4, backward size: 4, stream flags: 2, magic bytes: 2
    my $footer_size = 4 + 4 + 2 + 2 ;
    seek($in, - $footer_size, SEEK_END) ;
    my $buf ;
    checked_read($in, \$buf, $footer_size, "Corrupted xz file\n") ;
    my ($CRC32, $backward_size, $flag1, $flag2,
        $magic) = unpack('VVC2H4', $buf) ;
    $backward_size = ($backward_size + 1) * 4 ; # see 2.1.2.2
    my $flags = decode_stream_flags($flag1, $flag2) ;
    my $check = $flags->[0] ;
    die "Not an xz file\n" if ($magic ne '595a') ; # aka 'YZ' see 2.1.2.4
    warn "CRC32_f: $CRC32, backward size: $backward_size, check: $check\n"
        if (DEBUG >= 2) ;
    # seek back to the beginning of the footer
    seek($in, - $footer_size, SEEK_END) ;
    return [$CRC32, $backward_size, $check] ;
}

sub parse_xz {
    my ($fname) = (shift) ;
    open (my $in, "<", $fname) || die "$fname: $!";
    warn "Parsing $fname" if (DEBUG >= 1) ;

    my ($fhdr, $shdr, $bhdr, $sftr, $index);
    $fhdr  = parse_stream_header($in) ;
    my ($check, $CRC32_h) = @$fhdr ;
    warn sprintf("check: %s, CRC32_h: %X\n", $check, $CRC32_h) if (DEBUG >= 2) ;
    $bhdr = parse_block_header($in, $check) ;
    warn "bhdr:".Dumper($bhdr)."\n" if (DEBUG >= 2) ;
    $sftr = parse_stream_footer($in, $check) ;
    warn "sftr:".Dumper($sftr)."\n" if (DEBUG >= 2) ;
    my ($CRC32_f, $backward_size, $check_f) = @$sftr ;
    warn sprintf("check: %s, CRC32_f: %X\n", $check, $CRC32_f) if (DEBUG >= 2) ;
    $index = parse_indices($in, $backward_size) ;
    my ($CRC32_i, $indices) = @$index ;
    warn sprintf("CRC32_i: %08X\n", $CRC32_i) if (DEBUG >= 2) ;
    warn "indices:".Dumper($indices)."\n" if (DEBUG >= 2) ;

    my $filters = $bhdr->[1] ;
    warn "filters:".Dumper($filters)."\n" if (DEBUG >= 2) ;
    die "Can't decode more than one filter\n" if ($#$filters > 0) ;
    my $filter = $filters->[0]->[1] ;
    warn "filter:".Dumper($filter)."\n" if (DEBUG >= 2) ;
    printf("Id: %s, check: %s, presets: %s\n", $filter->{id}, $check,
           join('|', @{$filter->{presets}})) ;
    my @input_block_sizes = map {$_->[1]} @$indices ;
    printf("Block(s): %s\n", join(", ", @input_block_sizes)) ;

    close $in;
}

sub main {
    if ($#ARGV != 0) {
        die "A single file name is expected\n" ;
    }
    parse_xz($ARGV[0]) ;
}

main() ;



-- 
To UNSUBSCRIBE, email to [email protected]
with a subject of "unsubscribe". Trouble? Contact [email protected]

Reply via email to