This patch tries to allow the unzipping of concatenated gzip files.
Test:
% echo First > first
% gzip first
% echo Second > second
% gzip second
% cat first.gz second.gz > total.gz
% ./busybox gunzip total.gz
% cat total
First
Second
%
If there are no errors, this should address Debian's bug
#402482<http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=402482>
.
The feature costs 400 bytes on i386. I've removed a couple of
initializations that (look) useless (to me). I've also added a feature
for message "decompression OK, trailing garbage ignored".
Loïc
Index: archival/Config.in
===================================================================
--- archival/Config.in (revision 20170)
+++ archival/Config.in (working copy)
@@ -114,6 +114,24 @@
archives created by the program compress (not much
used anymore).
+config FEATURE_GUNZIP_TRAILING_GARBAGE
+ bool "\"decompression OK, trailing garbage ignored\" message"
+ default n
+ depends on GUNZIP
+ help
+ Enable if you want gunzip to tell you the above message
+ when there are some trailing bytes after compressed archive.
+
+config FEATURE_GUNZIP_MULTIPLE
+ bool "Contatenated gzip'ed files"
+ default n
+ depends on GUNZIP
+ help
+ Enable if you want gunzip to handle multiple concatenated
+ gzip'ed files. It will only work with gzip'ed files, not with
+ compressed files, even if you have enabled uncompress
+ support above.
+
config GZIP
bool "gzip"
default n
Index: archival/bbunzip.c
===================================================================
--- archival/bbunzip.c (revision 20170)
+++ archival/bbunzip.c (working copy)
@@ -74,7 +74,7 @@
goto err;
}
/* O_EXCL: "real" bunzip2 doesn't overwrite files */
- /* GNU gunzip goes not bail out, but goes to next file */
+ /* GNU gunzip does not bail out, but goes to next file */
if (open_to_or_warn(STDOUT_FILENO, new_name, O_WRONLY | O_CREAT | O_EXCL,
stat_buf.st_mode))
goto err;
Index: archival/libunarchive/decompress_unzip.c
===================================================================
--- archival/libunarchive/decompress_unzip.c (revision 20170)
+++ archival/libunarchive/decompress_unzip.c (working copy)
@@ -144,8 +144,10 @@
#define inflate_stored_b (S()inflate_stored_b )
#define inflate_stored_k (S()inflate_stored_k )
#define inflate_stored_w (S()inflate_stored_w )
-#define INIT_STATE ({ bytebuffer_size = 0; method = -1; need_another_block = 1; })
+#define REINIT_STATE ({ /*method = -1;*/ need_another_block = 1; })
+#define INIT_STATE ({ bytebuffer_size = 0; bytebuffer = NULL; })
+
/* This is generic part */
#if STATE_IN_BSS /* Use global data segment */
@@ -176,7 +178,10 @@
static state_t* alloc_state(void)
{
state_t* state = xzalloc(sizeof(*state));
+#if 0
+ /* Useless: already 0 */
INIT_STATE;
+#endif
return state;
}
#endif
@@ -239,6 +244,102 @@
return bitbuffer;
}
+static int top_up(STATE_PARAM unsigned n)
+{
+ /* top up the input buffer with n bytes */
+ unsigned count = bytebuffer_size - bytebuffer_offset;
+ if (count < n) {
+ n -= count;
+ if (full_read(gunzip_src_fd, &bytebuffer[bytebuffer_size], n) != n)
+ return -1;
+//shouldn't we propagate error?
+ bytebuffer_size += n;
+ }
+ return 0;
+}
+
+static unsigned short buffer_read_short(STATE_PARAM_ONLY)
+{
+ unsigned short res;
+
+ res = bytebuffer[bytebuffer_offset ] |
+ (bytebuffer[bytebuffer_offset+1] << 8);
+ bytebuffer_offset += 2;
+ return res;
+}
+
+static int internal_check_header_gzip(STATE_PARAM_ONLY)
+{
+ union {
+ unsigned char raw[8];
+ struct {
+ unsigned char gz_method;
+ unsigned char flags;
+ unsigned int mtime;
+ unsigned char xtra_flags;
+ unsigned char os_flags;
+ } formatted;
+ } header;
+
+ /*
+ * Rewind bytebuffer. We use the beginning because the header has 8
+ * bytes, leaving enough for unwinding afterwards.
+ */
+ bytebuffer_size -= bytebuffer_offset;
+ memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size);
+ bytebuffer_offset = 0;
+
+ if (top_up(PASS_STATE 8))
+ return -1;
+ memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8);
+ bytebuffer_offset += 8;
+
+ /* Check the compression method */
+ if (header.formatted.gz_method != 8) {
+ return -1;
+ }
+
+ if (header.formatted.flags & 0x04) {
+ /* bit 2 set: extra field present */
+ unsigned extra_short;
+
+ if (top_up(PASS_STATE 2))
+ return -1;
+ extra_short = buffer_read_short(PASS_STATE_ONLY);
+ if (top_up(PASS_STATE extra_short))
+ return -1;
+ /* Ignore extra field */
+ bytebuffer_offset += extra_short;
+ }
+
+ /* Discard original name or file comment if any */
+ /* Discard file comment if any */
+ if (header.formatted.flags & 0x18) {
+ bool comm = 0;
+ /* bit 3 set: original file name present */
+ /* bit 4 set: file comment present */
+read_string:
+ do {
+ if (top_up(PASS_STATE 1))
+ return -1;
+ }
+ while (bytebuffer[bytebuffer_offset++] != 0);
+ if (comm == 0 && (header.formatted.flags & 0x18) == 0x18)
+ {
+ comm = 1;
+ goto read_string;
+ }
+ }
+
+ /* Read the header checksum */
+ if (header.formatted.flags & 0x02) {
+ if (top_up(PASS_STATE 2))
+ return -1;
+ bytebuffer_offset += 2;
+ }
+ return 0;
+}
+
/*
* Free the malloc'ed tables built by huft_build(), which makes a linked
* list of the tables it made, with the links in a dummy first entry of
@@ -952,7 +1053,7 @@
if (end_reached) {
calculate_gunzip_crc(PASS_STATE_ONLY);
end_reached = 0;
- need_another_block = 1;
+ /*need_another_block = 1;*/
return 0; /* Last block */
}
method = inflate_block(PASS_STATE &end_reached);
@@ -989,6 +1090,8 @@
USE_DESKTOP(long long) int n = 0;
ssize_t nwrote;
+ REINIT_STATE;
+
/* Allocate all global buffers (for DYN_ALLOC option) */
gunzip_window = xmalloc(GUNZIP_WSIZE);
gunzip_outbuf_count = 0;
@@ -1003,8 +1106,11 @@
gunzip_crc_table = crc32_filltable(NULL, 0);
gunzip_crc = ~0;
- /* Allocate space for buffer */
- bytebuffer = xmalloc(bytebuffer_max);
+ if (!ENABLE_FEATURE_GUNZIP_MULTIPLE || bytebuffer == NULL)
+ {
+ /* Allocate space for buffer */
+ bytebuffer = xmalloc(bytebuffer_max);
+ }
while (1) {
int r = inflate_get_next_window(PASS_STATE_ONLY);
@@ -1044,7 +1150,8 @@
ALLOC_STATE;
bytebuffer_max = bufsize + 8;
- bytebuffer_offset = 4;
+ //bytebuffer_offset = 4;
+
n = inflate_unzip_internal(PASS_STATE in, out);
res->crc = gunzip_crc;
@@ -1058,46 +1165,79 @@
USE_DESKTOP(long long) int
unpack_gz_stream(int in, int out)
{
- uint32_t stored_crc = 0;
+ uint32_t stored_crc;
unsigned count;
- USE_DESKTOP(long long) int n;
+#if ENABLE_FEATURE_GUNZIP_MULTIPLE
+ USE_DESKTOP(long long) int nincr;
+#else
+#define nincr n
+#endif
+ USE_DESKTOP(long long) int n = 0;
+ int magic;
DECLARE_STATE;
ALLOC_STATE;
bytebuffer_max = 0x8000;
- n = inflate_unzip_internal(PASS_STATE in, out);
+ do {
+ nincr = inflate_unzip_internal(PASS_STATE in, out);
- if (n < 0) goto ret;
+ if (ENABLE_FEATURE_GUNZIP_MULTIPLE && nincr < 0)
+ {
+ if (n == 0)
+ n = nincr;
+ goto ret;
+ }
- /* top up the input buffer with the rest of the trailer */
- count = bytebuffer_size - bytebuffer_offset;
- if (count < 8) {
- xread(in, &bytebuffer[bytebuffer_size], 8 - count);
-//shouldn't we propagate error?
- bytebuffer_size += 8 - count;
- }
- for (count = 0; count != 4; count++) {
- stored_crc |= (bytebuffer[bytebuffer_offset] << (count * 8));
- bytebuffer_offset++;
- }
+ /* top up the input buffer with the rest of the trailer */
+ if (top_up(PASS_STATE 8))
+ {
+ if (!ENABLE_FEATURE_GUNZIP_MULTIPLE || n == 0)
+ bb_error_msg_and_die("short read");
+ else
+ bb_error_msg("short read");
+ goto ret;
+ }
+ stored_crc = buffer_read_short(PASS_STATE_ONLY);
+ stored_crc |= ((unsigned)buffer_read_short(PASS_STATE_ONLY)) << 16;
- /* Validate decompression - crc */
- if (stored_crc != (~gunzip_crc)) {
- bb_error_msg("crc error");
- n = -1;
- goto ret;
- }
+ /* Validate decompression - crc */
+ if (stored_crc != (~gunzip_crc)) {
+ if (!ENABLE_FEATURE_GUNZIP_MULTIPLE || n == 0)
+ bb_error_msg_and_die("crc error");
+ else
+ bb_error_msg("crc error");
+ goto ret;
+ }
- /* Validate decompression - size */
- if (gunzip_bytes_out !=
- (bytebuffer[bytebuffer_offset] | (bytebuffer[bytebuffer_offset+1] << 8) |
- (bytebuffer[bytebuffer_offset+2] << 16) | (bytebuffer[bytebuffer_offset+3] << 24))
- ) {
- bb_error_msg("incorrect length");
- n = -1;
- }
+ /* Validate decompression - size */
+ count = buffer_read_short(PASS_STATE_ONLY);
+ count |= ((unsigned)buffer_read_short(PASS_STATE_ONLY)) << 16;
+ if (gunzip_bytes_out != count) {
+ bb_error_msg("incorrect length");
+ if (ENABLE_FEATURE_GUNZIP_MULTIPLE && n == 0)
+ n = -1;
+ goto ret;
+ }
+
+#if ENABLE_FEATURE_GUNZIP_TRAILING_GARBAGE
+ /* costs 77 bytes */
+ magic = -1;
+#endif
+ if (ENABLE_FEATURE_GUNZIP_MULTIPLE) {
+ n += nincr;
+ if (top_up(PASS_STATE 2))
+ break;
+ magic = buffer_read_short(PASS_STATE_ONLY);
+ }
+ } while (ENABLE_FEATURE_GUNZIP_MULTIPLE && magic == 0x8b1f &&
+ !internal_check_header_gzip(PASS_STATE_ONLY));
ret:
+#if ENABLE_FEATURE_GUNZIP_TRAILING_GARBAGE
+ /* costs 77 bytes */
+ if (magic >= 0 || bytebuffer_size > bytebuffer_offset)
+ bb_error_msg("decompression OK, trailing garbage ignored");
+#endif
free(bytebuffer);
DEALLOC_STATE;
return n;
_______________________________________________
busybox mailing list
[email protected]
http://busybox.net/cgi-bin/mailman/listinfo/busybox