#!/usr/bin/perl -w
# Proof of concept code for a pure perl gunzip
# Author: Ton Hospel
# GPL or Perl Artistic, your choice

# About 250 times slower than real gunzip....

use 5.006_001;

use strict;
use Time::Local;

use constant FTEXT	=> 1;
use constant FHCRC	=> 2;
use constant FEXTRA	=> 4;
use constant FNAME	=> 8;
use constant FCOMMENT	=> 16;

{
    my ($fin,$crc16,@crc32, @lit_base, @dist_base, $bits_val, $bits_have);
    my $base_time;

    # get arg bytes
    sub r {
        my $rc = read($fin, my $result, $_[0]);
        if ($rc) {
            die "Unexpected short read" if $rc != shift;
            return $result;
        }
        die "Unexpected EOF" if defined($rc);
        die "Read error: $!";
    }

    # get null-terminated string
    sub z {
        local $/ = "\0";
        defined(my $result = <$fin>) || die "Unexpected EOF";
        $crc16 = $crc16>>8^$crc32[$crc16&0xff^ord(substr $result,$_,1)]
            for 0..length($result)-1;
        chomp($result) || die "Unexpected short read";
        return $result;
    }

    my @lit_extra = (-1,
                     0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,
                     3,3,3,3,4,4,4,4,5,5,5,5,0,-2,-2);
    my @dist_extra = (0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,
                      9,9,10,10,11,11,12,12,13,13,-1,-1);
    my @alpha_map = (16, 17, 18, 0, 8, 7, 9, 6, 10,
                    5, 11, 4, 12, 3, 13, 2, 14, 1, 15);
    sub prepare_tables {
        my $length = 3;
        for (@lit_extra) {
            push @lit_base, $length;
            $length += 1 << $_ if $_ >= 0;
        }
        # Exceptional case
        splice(@lit_base, -3, 3, 258);

        my $dist = 1;
        for (@dist_extra) {
            push @dist_base, $dist;
            $dist += 1 << $_ if $_ >= 0;
        }
        splice(@dist_base, -2, 2);
    }

    # get arg bits (little endian)
    sub b {
        while ($_[0] > $bits_have) {
            $bits_val |= ord(r(1)) << $bits_have;
            $bits_have += 8;
        }
        my $result = $bits_val & (1 << $_[0])-1;
        $bits_val >>= $_[0];
        $bits_have -= shift;
        return $result;
    }

    # Get one huffman code
    sub h {
        my $code = shift;
        my $str = "";
        do {
            if (--$bits_have < 0) {
                my $c = getc($fin);
                if (!defined($c)) {
                    die "Read error: $!" if $!;
                    die "Unexpected EOF\n";
                }
                $bits_val = ord($c);
                $bits_have = 7;
            }
            $str .= $bits_val & 1;
            $bits_val >>= 1;
        } until exists $code->{$str};
        defined($code->{$str}) || die "Bad code $str";
        return $code->{$str};
    }

    # construct huffman code
    sub make_huffman {
        my $counts = shift;
        my (%code, @counts);
        push @{$counts[$counts->[$_]]}, $_ for 0..$#$counts;
        my $value = 0;
        my $bits = -1;
        for (@counts) {
            $value *= 2;
            next unless ++$bits && $_;
            $code{sprintf"%0${bits}b", $value++} = $_ for @$_;
        }
        # Close the code to avoid infinite loops (and out of memory)
        $code{sprintf"%0${bits}b", $value++} = undef for
            $value .. (1 << $bits)-1;
        @code{0, 1} = () unless %code;
        return \%code;
    }

    my ($static_lit_code, $static_dist_code);
    sub gunzip_work(**) {
        ($fin, my $fout_control) = @_;
        binmode $fin;
        my $fdo = fileno($fout_control);

        unless (@crc32) {
            my $p=oct reverse sprintf"%032bb0", 0x04C11DB7;
            @crc32 = map{for my$s(0..7) {$_ = $_>>1 ^ ($_&1 && $p)} $_} 0..255;
        }

        my $mtime;
        while (!eof($fin)) {
            $crc16 = 0xffffffff;

            my $header = r(10);
            my ($id1, $id2, $cm, $flags, $mt, $xflags, $os) =
                unpack("CCCCVCC", $header);
            if ($mt) {
                $base_time ||= timegm(0, 0, 0, 1, 0, 70);
                $mtime = $base_time + $mt;
            }
            print STDERR "id=$id1,$id2,cm=$cm,fl=$flags, mtime=$mt(", scalar gmtime($mtime), " UT),xfl=$xflags,os=$os\n";
            $id1 ==  31 || die "First id byte should be 31, not $id1\n";
            $id2 == 139 || die "Second id byte should be 139, not $id2\n";
            $cm  ==   8 || die "Only support deflate (method 8), not method $cm\n";
            die "Unhandled flagbits in $flags\n" if
                $flags & ~(FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT);
            my $extra;
            if ($flags & FEXTRA) {
                $header .= my $h = r(2);
                $header .= $extra = r(unpack("v", $h));
            }
            $crc16 = $crc16>>8^$crc32[$crc16&0xff^ord(substr $header,$_,1)]
                for 0..length($header)-1;
            my $fname   = z() if $flags & FNAME;
            my $comment = z() if $flags & FCOMMENT;
            if ($flags & FHCRC) {
                $crc16 = ($crc16 ^ 0xffffffff) & 0xffff;
                my $wanted_crc16 = unpack("v", r(2));
                print STDERR "crc16=$crc16, wanted_crc16=$wanted_crc16\n";
                $crc16 == $wanted_crc16 ||
                    die "Header CRC16=$crc16, expected $wanted_crc16\n";
            }

            # New open because we want to be able to decide binmode
            # depending on the segment flags
            open(my $fout, ">>&$fdo") || 
                die "Could not dup output handle: $!\n";
            binmode($fout) unless $flags & FTEXT;
            $bits_val = $bits_have = 0;

            my $result = "";
            my $final;
            my $crc32 = 0xffffffff;
            my $isize = 0;
            do {
                $final = b(1);
                my $type = b(2);
                print STDERR "block type $type\n";
                if ($type) {
                    my ($lit_code, $dist_code);
                    prepare_tables() unless @lit_base;

                    if ($type == 1) {
                        $lit_code  = $static_lit_code  ||=
                            make_huffman([(8)x144,(9)x112, (7)x24, (8)x8]);
                        $dist_code = $static_dist_code ||= 
                            make_huffman([(5)x32]);
                    } elsif ($type == 2) {
                        my $hlit  = b(5)+257;
                        my $hdist = b(5)+1;
                        my $hclen = b(4)+4;
                        print STDERR "hlit=$hlit,hdist=$hdist,hclen=$hclen\n";

                        # Determine the code length huffman code
                        my @alpha_code = (0) x @alpha_map;
                        $alpha_code[$alpha_map[$_]] = b(3) for 0..$hclen-1;
                        my $alpha_code = make_huffman(\@alpha_code);

                        # Get lit/length and distance tables
                        my @code_len;
                        while (@code_len < $hlit+$hdist) {
                            my $alpha = h($alpha_code);
                            if ($alpha < 16) {
                                push @code_len, $alpha;
                            } elsif ($alpha == 16) {
                                push @code_len, ($code_len[-1]) x (3+b(2));
                            } elsif ($alpha == 17) {
                                push @code_len, (0) x (3+b(3));
                            } else {
                                push @code_len, (0) x (11+b(7));
                            }
                        }
                        @code_len == $hlit+$hdist || die "too many codes";
                        my @lit_len = splice(@code_len, 0, $hlit);
                        $lit_code  = make_huffman(\@lit_len);
                        $dist_code = make_huffman(\@code_len);
                    } else {
                        die "deflate subtype $type not supported\n";
                    }
                    while (1) {
                        my $lit = h($lit_code);
                        if ($lit < 256) {
                            $result .= chr($lit);
                            $crc32 = $crc32>>8 ^ $crc32[$crc32&0xff^$lit];
                        } else {
                            if ($lit_extra[$lit -= 256] < 0) {
                                die "Invalid literal code" if $lit;
                                last;
                            }
                            my $length = $lit_base[$lit] + 
                                ($lit_extra[$lit] && b($lit_extra[$lit]));

                            my $d = h($dist_code);
                            die "Invalid distance code" if $d >= 30;
                            my $dist = $dist_base[$d] + 
                                ($dist_extra[$d] && b($dist_extra[$d]));
                            $crc32 = $crc32>>8 ^ $crc32[$crc32&0xff^ord(substr($result .= substr($result, -$dist, 1), -1, 1))] for 1..$length;
                        }
                        $isize = ($isize + 32768) % 2**32, print $fout substr($result, 0, 32768, "") while
                            length($result) >= 65536;
                    }
                } else {
                    # Not compressed;
                    $bits_val = $bits_have = 0;
                    my ($len, $nlen) = unpack("vv", r(4));
                    $len == (~$nlen & 0xffff) ||
                        die "$len is not the 1-complement of $nlen";
                    $result .= r($len);
                    $crc32 = $crc32>>8 ^ $crc32[$crc32&0xff^ord(substr($result, $_, 1))] for -$len..-1;
                    $isize = ($isize + 32768) % 2**32, print $fout substr($result, 0, 32768, "") while
                        length($result) >= 65536;
                }
            } until $final;

            $crc32 ^= 0xffffffff;
            $isize = ($isize + length($result)) % 2**32;
            print $fout $result;
            close($fout) || die "Error closing output handle: $!\n";

            my ($wanted_crc32, $wanted_isize) = unpack("VV", r(8));
            print STDERR "crc32=$crc32, wanted_crc32=$wanted_crc32, isize=$isize, wanted_isize=$wanted_isize\n";
            $crc32 == $wanted_crc32 ||
                die "CRC32=$crc32, expected $wanted_crc32\n";
            $isize == $wanted_isize ||
                die "element size=$isize, expected $wanted_isize\n";
        }
        return $mtime;
    }
}

sub gunzip {
    my $to = my $file = shift;
    $to =~ s/\.gz\z//i || die "Filename $to does not end on .gz\n";
    my $new = "$to.new.$$";
    open(my $fout, ">", $new) || die "Could not create $new: $!";
    eval {
        open(my $fin, "<", $file) || die "Could not open $file: $!";
        my ($mtime) = gunzip_work($fin, $fout);
        close $fout;
        utime $mtime, $mtime, $new if $mtime;
        rename($new, $to) || die "Could not rename $new to $to: $!";
    };
    if ($@) {
        close $fout;
        unlink($new) || die "Could not unlink $new: $! after $@";
        die $@;
    }
}

gunzip($_) for @ARGV;
