On Thu, 13 Oct 2022 14:14:28 -0600, "Todd C. Miller" wrote: > GNU gzip support uncompressing .zip files that contain a single > member. This can be very convenient when dealing with email > attachments that use zip instead of gzip to compress a single file > (I'm looking at you Google). Below is a diff to support this with > our gzip. This turned out to be more complicated than I expected > simply due to the number of zip extended header fields. > > If there is more than a single file in the .zip we extract the first > one but leave the origin .zip file alone. I made some minor tweaks > to main.c to support this. We now only remove the output file if > error is set to FAILURE, not WARNING. This has the side effect > that the output file is no longer removed simply due to a crc error, > which I think is an improvement.
I found a problem with my previous version reporting crc errors for larger files. That is fixed in the version below. With the latest diff here is the size of distrib/special/gzip on amd64: Old: text data bss dec hex 184591 7776 29104 221471 3611f New: text data bss dec hex 184559 7776 29104 221439 360ff There is now a net decrease in size :-) - todd Index: usr.bin/compress/Makefile =================================================================== RCS file: /cvs/src/usr.bin/compress/Makefile,v retrieving revision 1.22 diff -u -p -u -r1.22 Makefile --- usr.bin/compress/Makefile 30 Mar 2016 06:38:45 -0000 1.22 +++ usr.bin/compress/Makefile 13 Oct 2022 19:33:57 -0000 @@ -1,7 +1,7 @@ # $OpenBSD: Makefile,v 1.22 2016/03/30 06:38:45 jmc Exp $ PROG= compress -SRCS= main.c zopen.c gzopen.c nullopen.c +SRCS= main.c zopen.c gzopen.c zipopen.c nullopen.c MAN= compress.1 gzexe.1 gzip.1 zdiff.1 zforce.1 zmore.1 znew.1 LINKS= ${BINDIR}/compress ${BINDIR}/uncompress \ ${BINDIR}/compress ${BINDIR}/zcat \ Index: usr.bin/compress/compress.h =================================================================== RCS file: /cvs/src/usr.bin/compress/compress.h,v retrieving revision 1.14 diff -u -p -u -r1.14 compress.h --- usr.bin/compress/compress.h 18 Jan 2021 00:46:58 -0000 1.14 +++ usr.bin/compress/compress.h 13 Oct 2022 19:34:49 -0000 @@ -65,13 +65,16 @@ extern int zread(void *, char *, int); extern int zwrite(void *, const char *, int); extern int z_close(void *, struct z_info *, const char *, struct stat *); - extern void *gz_ropen(int, char *, int); extern void *gz_wopen(int, char *, int, u_int32_t); extern int gz_read(void *, char *, int); extern int gz_write(void *, const char *, int); extern int gz_close(void *, struct z_info *, const char *, struct stat *); extern int gz_flush(void *, int); + +extern void *zip_ropen(int, char *, int); +extern int zip_read(void *, char *, int); +extern int zip_close(void *, struct z_info *, const char *, struct stat *); extern void *null_ropen(int, char *, int); extern void *null_wopen(int, char *, int, u_int32_t); Index: usr.bin/compress/gzip.1 =================================================================== RCS file: /cvs/src/usr.bin/compress/gzip.1,v retrieving revision 1.15 diff -u -p -u -r1.15 gzip.1 --- usr.bin/compress/gzip.1 14 Mar 2022 21:52:08 -0000 1.15 +++ usr.bin/compress/gzip.1 14 Oct 2022 00:04:57 -0000 @@ -83,10 +83,11 @@ utility restores compressed files to the files by removing the extension (or by using the stored name if the .Fl N flag is specified). -It has the ability to restore files compressed by both -.Nm +It has the ability to restore files compressed by +.Nm , +.Xr compress 1 and -.Xr compress 1 , +.Xr zip 1 , recognising the following extensions: .Dq .Z , .Dq -Z , @@ -99,14 +100,23 @@ recognising the following extensions: .Dq _tgz , .Dq .taz , .Dq -taz , +.Dq _taz , +.Dq .zip , +.Dq -zip and -.Dq _taz . +.Dq _zip . Extensions ending in .Dq tgz and .Dq taz are not removed when decompressing, instead they are converted to .Dq tar . +Files in zip format are only supported if they contain a single member +compressed with either the +.Em deflate +or +.Em store +(uncompressed) method. .Pp The .Nm gzcat Index: usr.bin/compress/main.c =================================================================== RCS file: /cvs/src/usr.bin/compress/main.c,v retrieving revision 1.101 diff -u -p -u -r1.101 main.c --- usr.bin/compress/main.c 29 Aug 2022 19:42:01 -0000 1.101 +++ usr.bin/compress/main.c 14 Oct 2022 00:31:14 -0000 @@ -102,6 +102,20 @@ const struct compressor { zwrite, z_close }, +#define M_UNZIP (&c_table[2]) + { + "unzip", + ".zip", + "PK", + NULL, + "cfhkLlNno:qrtVv", + "fhqr", + zip_ropen, + zip_read, + NULL, + NULL, + zip_close + }, #endif /* SMALL */ { NULL } }; @@ -725,10 +739,21 @@ dodecompress(const char *in, char *out, error = errno == EINVAL ? WARNING : FAILURE; } - if (method->close(cookie, &info, NULL, NULL)) { - if (!error && verbose >= 0) - warnx("%s", in); - error = FAILURE; + if (method->close(cookie, &info, NULL, NULL) && !error) { +#ifdef M_UNZIP + if (errno == EEXIST) { + if (verbose >= 0) { + warnx("more than one entry in %s: %s", in, + cat ? "ignoring the rest" : "unchanged"); + } + error = WARNING; + } else +#endif + { + if (verbose >= 0) + warn("%s", in); + error = FAILURE; + } } if (storename && !cat) { if (info.mtime != 0) { @@ -736,10 +761,9 @@ dodecompress(const char *in, char *out, sb->st_atimespec.tv_sec = info.mtime; sb->st_mtimespec.tv_nsec = sb->st_atimespec.tv_nsec = 0; - } else - storename = 0; /* no timestamp to restore */ + } } - if (error == SUCCESS) + if (error != FAILURE) setfile(out, ofd, sb); if (ofd != -1 && close(ofd)) { @@ -748,7 +772,7 @@ dodecompress(const char *in, char *out, error = FAILURE; } - if (!error) { + if (error != FAILURE) { if (list) { if (info.mtime == 0) info.mtime = (u_int32_t)sb->st_mtime; @@ -760,7 +784,7 @@ dodecompress(const char *in, char *out, } /* On error, clean up the file we created but preserve errno. */ - if (error && oreg) + if (error == FAILURE && oreg) unlink(out); return (error); @@ -830,7 +854,7 @@ check_suffix(const char *infile) { int i; char *suf, *sep, *separators = ".-_"; - static char *suffixes[] = { "Z", "gz", "z", "tgz", "taz", NULL }; + static char *suffixes[] = { "Z", "gz", "z", "tgz", "taz", "zip", NULL }; for (sep = separators; *sep != '\0'; sep++) { if ((suf = strrchr(infile, *sep)) == NULL) Index: usr.bin/compress/zipopen.c =================================================================== RCS file: usr.bin/compress/zipopen.c diff -N usr.bin/compress/zipopen.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/compress/zipopen.c 14 Oct 2022 00:20:46 -0000 @@ -0,0 +1,454 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 Todd C. Miller <todd.mil...@sudo.ws> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <limits.h> +#include <zlib.h> +#include "compress.h" + +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) + +/* Signatures for zip file headers we use. */ +#define ZIPMAG 0x4b50 /* first two bytes of the zip signature */ +#define LOCREM 0x0403 /* remaining two bytes in zip signature */ +#define LOCSIG 0x04034b50 /* local file header signature */ +#define EXTSIG 0x08074b50 /* extended local header signature */ + +/* Header sizes. */ +#define LOCHDR 30 /* size of local header, including signature */ +#define EXTHDR 16 /* size of extended local header, inc sig */ + +/* General purpose flag bits. */ +#define CRPFLG 1 /* flag bit for encrypted entry */ +#define EXTFLG 8 /* flag bit for extended local header */ + +/* Extra field definitions */ +#define EF_ZIP64 0x0001 /* zip64 support */ +#define EF_TIME 0x5455 /* mtime, atime, ctime in UTC ("UT") */ +#define EF_IZUNIX 0x5855 /* UNIX extra field ID ("UX") */ + +#define Z_STORED 0 /* Stored uncompressed in .zip */ + +struct zip_state { + z_stream z_stream; /* libz stream */ + uint8_t z_buf[Z_BUFSIZE]; /* I/O buffer */ + uint8_t z_eof; /* set if end of input file */ + uint8_t z_zip64; /* 64-bit file sizes */ + uint16_t z_method; /* Z_DEFLATE or Z_STORED */ + uint16_t z_flags; /* general purpose flags */ + int z_fd; /* zip file descriptor */ + uint32_t z_time; /* timestamp (mtime) */ + uint32_t z_crc; /* crc32 of uncompressed data */ + uint32_t z_ocrc; /* crc32 of uncompressed data (from header) */ + uint32_t z_hlen; /* length of the zip header */ + uint64_t z_ulen; /* uncompressed data length (from header) */ + uint64_t z_total_in; /* # bytes in */ + uint64_t z_total_out; /* # bytes out */ +}; + +static int +get_byte(struct zip_state *s) +{ + if (s->z_eof) + return EOF; + + if (s->z_stream.avail_in == 0) { + ssize_t nread = read(s->z_fd, s->z_buf, Z_BUFSIZE); + if (nread <= 0) { + s->z_eof = 1; + return EOF; + } + s->z_stream.avail_in = nread; + s->z_stream.next_in = s->z_buf; + } + s->z_stream.avail_in--; + return *s->z_stream.next_in++; +} + +static uint16_t +get_uint16(struct zip_state *s) +{ + uint16_t x; + + x = ((uint16_t)(get_byte(s) & 0xff)); + x |= ((uint16_t)(get_byte(s) & 0xff))<<8; + return x; +} + +static uint32_t +get_uint32(struct zip_state *s) +{ + uint32_t x; + + x = ((uint32_t)(get_byte(s) & 0xff)); + x |= ((uint32_t)(get_byte(s) & 0xff))<<8; + x |= ((uint32_t)(get_byte(s) & 0xff))<<16; + x |= ((uint32_t)(get_byte(s) & 0xff))<<24; + return x; +} + +static uint64_t +get_uint64(struct zip_state *s) +{ + uint64_t x; + + x = ((uint64_t)(get_byte(s) & 0xff)); + x |= ((uint64_t)(get_byte(s) & 0xff))<<8; + x |= ((uint64_t)(get_byte(s) & 0xff))<<16; + x |= ((uint64_t)(get_byte(s) & 0xff))<<24; + x |= ((uint64_t)(get_byte(s) & 0xff))<<32; + x |= ((uint64_t)(get_byte(s) & 0xff))<<40; + x |= ((uint64_t)(get_byte(s) & 0xff))<<48; + x |= ((uint64_t)(get_byte(s) & 0xff))<<56; + return x; +} + +static int +get_header(struct zip_state *s, char *name, int gotmagic) +{ + int c, got_mtime = 0; + uint16_t namelen, extlen; + uint32_t sig; + + /* Check the zip local file header signature. */ + if (!gotmagic) { + sig = get_uint32(s); + if (sig != LOCSIG) { + errno = EFTYPE; + return -1; + } + } else { + sig = get_uint16(s); + if (sig != LOCREM) { + errno = EFTYPE; + return -1; + } + } + + /* Read the local header fields. */ + get_uint16(s); /* min version */ + s->z_flags = get_uint16(s); /* general purpose flags */ + s->z_method = get_uint16(s); /* compression method */ + get_uint32(s); /* DOS format mtime */ + s->z_ocrc = get_uint32(s); /* 32-bit CRC */ + get_uint32(s); /* compressed size */ + s->z_ulen = get_uint32(s); /* uncompressed size */ + namelen = get_uint16(s); /* file name length */ + extlen = get_uint16(s); /* length of extra fields */ + s->z_hlen = LOCHDR; + + /* Encrypted files not supported. */ + if (s->z_flags & CRPFLG) { + errno = EFTYPE; + return -1; + } + + /* Supported compression methods are deflate and store. */ + if (s->z_method != Z_DEFLATED && s->z_method != Z_STORED) { + errno = EFTYPE; + return -1; + } + + /* Store the original file name if present. */ + if (namelen != 0 && name != NULL) { + const char *ep = name + PATH_MAX - 1; + for (; namelen > 0; namelen--) { + if ((c = get_byte(s)) == EOF) + break; + s->z_hlen++; + if (c == '\0') + break; + if (name < ep) + *name++ = c; + } + *name = '\0'; + } + + /* Parse extra fields, if any. */ + while (extlen >= 4) { + uint16_t sig; + int fieldlen; + + sig = get_uint16(s); + fieldlen = get_uint16(s); + s->z_hlen += 4; + extlen -= 4; + + switch (sig) { + case EF_ZIP64: + /* 64-bit file sizes */ + s->z_zip64 = 1; + if (fieldlen >= 8) { + s->z_ulen = get_uint64(s); + s->z_hlen += 8; + extlen -= 8; + fieldlen -= 8; + } + break; + case EF_TIME: + /* UTC timestamps */ + if ((c = get_byte(s)) == EOF) + break; + s->z_hlen++; + extlen--; + fieldlen--; + if (c & 1) { + got_mtime = 1; + s->z_time = get_uint32(s); + s->z_hlen += 4; + extlen -= 4; + fieldlen -= 4; + } + break; + case EF_IZUNIX: + /* We prefer EF_TIME if it is present. */ + if (got_mtime) + break; + + /* skip atime, store mtime. */ + (void)get_uint32(s); + s->z_time = get_uint32(s); + s->z_hlen += 8; + extlen -= 8; + fieldlen -= 8; + break; + default: + break; + } + + /* Consume any unparsed bytes in the field. */ + for (; fieldlen > 0; fieldlen--) { + if (get_byte(s) == EOF) + break; + s->z_hlen++; + extlen--; + } + } + for (; extlen > 0; extlen--) { + if (get_byte(s) == EOF) + break; + s->z_hlen++; + } + + return 0; +} + +void * +zip_ropen(int fd, char *name, int gotmagic) +{ + struct zip_state *s; + + if (fd < 0) + return NULL; + + if ((s = calloc(1, sizeof(*s))) == NULL) + return NULL; + + s->z_fd = fd; + s->z_crc = crc32(0, NULL, 0); + + /* Request a raw inflate, there is no zlib/gzip header present. */ + if (inflateInit2(&s->z_stream, -MAX_WBITS) != Z_OK) { + free(s); + return NULL; + } + s->z_stream.next_in = s->z_buf; + s->z_stream.avail_out = sizeof(s->z_buf); + + /* Read the zip header. */ + if (get_header(s, name, gotmagic) != 0) { + zip_close(s, NULL, NULL, NULL); + s = NULL; + } + + return s; +} + +static int +zip_store(struct zip_state *s) +{ + int error = Z_OK; + uLong copy_len; + + if ((int)s->z_stream.avail_in <= 0) + return s->z_stream.avail_in == 0 ? Z_STREAM_END : Z_DATA_ERROR; + + /* For stored files we rely on z_ulen being set. */ + copy_len = MINIMUM(s->z_stream.avail_out, s->z_stream.avail_in); + if (copy_len >= s->z_ulen - s->z_total_out) { + /* Don't copy past the end of the file. */ + copy_len = s->z_ulen - s->z_total_out; + error = Z_STREAM_END; + } + + memcpy(s->z_stream.next_out, s->z_stream.next_in, copy_len); + s->z_stream.next_out += copy_len; + s->z_stream.avail_out -= copy_len; + s->z_stream.next_in += copy_len; + s->z_stream.avail_in -= copy_len; + s->z_total_in += copy_len; + s->z_total_out += copy_len; + + return error; +} + +int +zip_read(void *cookie, char *buf, int len) +{ + struct zip_state *s = cookie; + Bytef *ubuf = buf; + int error = Z_OK; + + s->z_stream.next_out = ubuf; + s->z_stream.avail_out = len; + + while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) { + if (s->z_stream.avail_in == 0) { + ssize_t nread = read(s->z_fd, s->z_buf, Z_BUFSIZE); + switch (nread) { + case -1: + goto bad; + case 0: + s->z_eof = 1; + continue; + default: + s->z_stream.avail_in = nread; + s->z_stream.next_in = s->z_buf; + } + } + + if (s->z_method == Z_DEFLATED) { + /* + * Prevent overflow of z_stream.total_{in,out} + * which may be 32-bit. + */ + uLong prev_total_in = s->z_stream.total_in; + uLong prev_total_out = s->z_stream.total_out; + error = inflate(&s->z_stream, Z_NO_FLUSH); + s->z_total_in += s->z_stream.total_in - prev_total_in; + s->z_total_out += s->z_stream.total_out - prev_total_out; + } else { + /* File stored uncompressed. */ + error = zip_store(s); + } + } + + switch (error) { + case Z_OK: + s->z_crc = crc32(s->z_crc, ubuf, + (uInt)(s->z_stream.next_out - ubuf)); + break; + case Z_STREAM_END: + s->z_eof = 1; + + /* + * Check CRC and original size. + * These may be found in the local header or, if + * EXTFLG is set, immediately following the file. + */ + s->z_crc = crc32(s->z_crc, ubuf, + (uInt)(s->z_stream.next_out - ubuf)); + + if (s->z_flags & EXTFLG) { + /* + * Read data descriptor: + * signature 0x08074b50: 4 bytes + * CRC-32: 4 bytes + * compressed size: 4 or 8 bytes + * uncompressed size: 4 or 8 bytes + */ + get_uint32(s); + s->z_ocrc = get_uint32(s); + if (s->z_zip64) { + get_uint64(s); + s->z_ulen = get_uint64(s); + s->z_hlen += 8; + } else { + get_uint32(s); + s->z_ulen = get_uint32(s); + } + s->z_hlen += EXTHDR; + } + if (s->z_ulen != s->z_total_out) { + errno = EIO; + goto bad; + } + if (s->z_ocrc != s->z_crc) { + errno = EINVAL; + goto bad; + } + break; + case Z_DATA_ERROR: + errno = EINVAL; + goto bad; + case Z_BUF_ERROR: + errno = EIO; + goto bad; + default: + goto bad; + } + + return len - s->z_stream.avail_out; +bad: + return -1; +} + +int +zip_close(void *cookie, struct z_info *info, const char *name, struct stat *sb) +{ + struct zip_state *s = cookie; + int error = 0; + + if (s == NULL) { + errno = EINVAL; + return -1; + } + + if (info != NULL) { + info->mtime = s->z_time; + info->crc = s->z_crc; + info->hlen = s->z_hlen; + info->total_in = s->z_total_in; + info->total_out = s->z_total_out; + } + + if (s->z_stream.state != NULL) { + /* inflateEnd() overwrites errno. */ + (void)inflateEnd(&s->z_stream); + } + + /* + * Check for the presence of additional files in the .zip. + * Do not remove the original if we cannot extract all the files. + */ + s->z_eof = 0; + if (get_header(s, NULL, 0) == 0) { + errno = EEXIST; + error = -1; + } + + (void)close(s->z_fd); + + free(s); + + return error; +}