GNU gzip support uncompressing .zip files that contain a single
member.  This can be very convenient when dealing with email
attachments that use zip instead of gzip to compress a single file
(I'm looking at you Google).  Below is a diff to support this with
our gzip.  This turned out to be more complicated than I expected
simply due to the number of zip extended header fields.

If there is more than a single file in the .zip we extract the first
one but leave the origin .zip file alone.  I made some minor tweaks
to main.c to support this.  We now only remove the output file if
error is set to FAILURE, not WARNING.  This has the side effect
that the output file is no longer removed simply due to a crc error,
which I think is an improvement.

Comments or OKs?

 - todd

Index: usr.bin/compress/Makefile
===================================================================
RCS file: /cvs/src/usr.bin/compress/Makefile,v
retrieving revision 1.22
diff -u -p -u -r1.22 Makefile
--- usr.bin/compress/Makefile   30 Mar 2016 06:38:45 -0000      1.22
+++ usr.bin/compress/Makefile   13 Oct 2022 19:33:57 -0000
@@ -1,7 +1,7 @@
 #      $OpenBSD: Makefile,v 1.22 2016/03/30 06:38:45 jmc Exp $
 
 PROG=  compress
-SRCS=  main.c zopen.c gzopen.c nullopen.c
+SRCS=  main.c zopen.c gzopen.c zipopen.c nullopen.c
 MAN=   compress.1 gzexe.1 gzip.1 zdiff.1 zforce.1 zmore.1 znew.1
 LINKS= ${BINDIR}/compress ${BINDIR}/uncompress \
        ${BINDIR}/compress ${BINDIR}/zcat \
Index: usr.bin/compress/compress.h
===================================================================
RCS file: /cvs/src/usr.bin/compress/compress.h,v
retrieving revision 1.14
diff -u -p -u -r1.14 compress.h
--- usr.bin/compress/compress.h 18 Jan 2021 00:46:58 -0000      1.14
+++ usr.bin/compress/compress.h 13 Oct 2022 19:34:49 -0000
@@ -65,13 +65,16 @@ extern int zread(void *, char *, int);
 extern int zwrite(void *, const char *, int);
 extern int z_close(void *, struct z_info *, const char *, struct stat *);
 
-
 extern void *gz_ropen(int, char *, int);
 extern void *gz_wopen(int, char *, int, u_int32_t);
 extern int gz_read(void *, char *, int);
 extern int gz_write(void *, const char *, int);
 extern int gz_close(void *, struct z_info *, const char *, struct stat *);
 extern int gz_flush(void *, int);
+
+extern void *zip_ropen(int, char *, int);
+extern int zip_read(void *, char *, int);
+extern int zip_close(void *, struct z_info *, const char *, struct stat *);
 
 extern void *null_ropen(int, char *, int);
 extern void *null_wopen(int, char *, int, u_int32_t);
Index: usr.bin/compress/gzip.1
===================================================================
RCS file: /cvs/src/usr.bin/compress/gzip.1,v
retrieving revision 1.15
diff -u -p -u -r1.15 gzip.1
--- usr.bin/compress/gzip.1     14 Mar 2022 21:52:08 -0000      1.15
+++ usr.bin/compress/gzip.1     13 Oct 2022 19:42:41 -0000
@@ -83,10 +83,11 @@ utility restores compressed files to the
 files by removing the extension (or by using the stored name if the
 .Fl N
 flag is specified).
-It has the ability to restore files compressed by both
-.Nm
+It has the ability to restore files compressed by
+.Nm ,
+.Xr compress 1
 and
-.Xr compress 1 ,
+.Xr zip 1 ,
 recognising the following extensions:
 .Dq .Z ,
 .Dq -Z ,
@@ -99,14 +100,23 @@ recognising the following extensions:
 .Dq _tgz ,
 .Dq .taz ,
 .Dq -taz ,
+.Dq _taz ,
+.Dq .zip ,
+.Dq -zip
 and
-.Dq _taz .
+.Dq _zip .
 Extensions ending in
 .Dq tgz
 and
 .Dq taz
 are not removed when decompressing, instead they are converted to
 .Dq tar .
+Files in zip format are only supported if they contain a single member
+compressed with either the
+.Em deflate
+or
+.Em store
+(uncompressed) method.
 .Pp
 The
 .Nm gzcat
Index: usr.bin/compress/main.c
===================================================================
RCS file: /cvs/src/usr.bin/compress/main.c,v
retrieving revision 1.101
diff -u -p -u -r1.101 main.c
--- usr.bin/compress/main.c     29 Aug 2022 19:42:01 -0000      1.101
+++ usr.bin/compress/main.c     13 Oct 2022 19:44:45 -0000
@@ -102,6 +102,19 @@ const struct compressor {
                zwrite,
                z_close
        },
+       {
+               "unzip",
+               ".zip",
+               "PK",
+               NULL,
+               "cfhkLlNno:qrtVv",
+               "fhqr",
+               zip_ropen,
+               zip_read,
+               NULL,
+               NULL,
+               zip_close
+       },
 #endif /* SMALL */
   { NULL }
 };
@@ -725,10 +738,18 @@ dodecompress(const char *in, char *out, 
                error = errno == EINVAL ? WARNING : FAILURE;
        }
 
-       if (method->close(cookie, &info, NULL, NULL)) {
-               if (!error && verbose >= 0)
-                       warnx("%s", in);
-               error = FAILURE;
+       if (method->close(cookie, &info, NULL, NULL) && !error) {
+               if (errno == EEXIST) {
+                       if (verbose >= 0) {
+                               warnx("more than one entry in %s: %s", in,
+                                   cat ? "ignoring the rest" : "unchanged");
+                       }
+                       error = WARNING;
+               } else {
+                       if (verbose >= 0)
+                               warn("%s", in);
+                       error = FAILURE;
+               }
        }
        if (storename && !cat) {
                if (info.mtime != 0) {
@@ -739,7 +760,7 @@ dodecompress(const char *in, char *out, 
                } else
                        storename = 0;          /* no timestamp to restore */
        }
-       if (error == SUCCESS)
+       if (error != FAILURE)
                setfile(out, ofd, sb);
 
        if (ofd != -1 && close(ofd)) {
@@ -748,7 +769,7 @@ dodecompress(const char *in, char *out, 
                error = FAILURE;
        }
 
-       if (!error) {
+       if (error != FAILURE) {
                if (list) {
                        if (info.mtime == 0)
                                info.mtime = (u_int32_t)sb->st_mtime;
@@ -760,7 +781,7 @@ dodecompress(const char *in, char *out, 
        }
 
        /* On error, clean up the file we created but preserve errno. */
-       if (error && oreg)
+       if (error == FAILURE && oreg)
                unlink(out);
 
        return (error);
@@ -830,7 +851,7 @@ check_suffix(const char *infile)
 {
        int i;
        char *suf, *sep, *separators = ".-_";
-       static char *suffixes[] = { "Z", "gz", "z", "tgz", "taz", NULL };
+       static char *suffixes[] = { "Z", "gz", "z", "tgz", "taz", "zip", NULL };
 
        for (sep = separators; *sep != '\0'; sep++) {
                if ((suf = strrchr(infile, *sep)) == NULL)
Index: usr.bin/compress/zipopen.c
===================================================================
RCS file: usr.bin/compress/zipopen.c
diff -N usr.bin/compress/zipopen.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ usr.bin/compress/zipopen.c  13 Oct 2022 20:06:33 -0000
@@ -0,0 +1,447 @@
+/*     $OpenBSD$       */
+
+/*
+ * Copyright (c) 2022 Todd C. Miller <todd.mil...@sudo.ws>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <limits.h>
+#include <zlib.h>
+#include "compress.h"
+
+#define MINIMUM(a, b)  (((a) < (b)) ? (a) : (b))
+
+/* Signatures for zip file headers we use. */
+#define ZIPMAG 0x4b50          /* first two bytes of the zip signature */
+#define LOCREM 0x0403          /* remaining two bytes in zip signature */
+#define LOCSIG 0x04034b50      /* local file header signature */
+#define EXTSIG 0x08074b50      /* extended local header signature */
+
+/* Header sizes. */
+#define LOCHDR 30              /* size of local header, including signature */
+#define EXTHDR 16              /* size of extended local header, inc sig */
+
+/* General purpose flag bits. */
+#define CRPFLG 1               /* flag bit for encrypted entry */
+#define EXTFLG 8               /* flag bit for extended local header */
+
+/* Extra field definitions */
+#define        EF_ZIP64        0x0001  /* zip64 support */
+#define        EF_TIME         0x5455  /* mtime, atime, ctime in UTC ("UT") */
+#define EF_IZUNIX      0x5855  /* UNIX extra field ID ("UX") */
+
+#define Z_STORED 0             /* Stored uncompressed in .zip */
+
+struct zip_state {
+       z_stream z_stream;      /* libz stream */
+       uint8_t  z_buf[Z_BUFSIZE]; /* I/O buffer */
+       uint8_t  z_eof;         /* set if end of input file */
+       uint8_t  z_zip64;       /* 64-bit file sizes */
+       uint16_t z_method;      /* Z_DEFLATE or Z_STORED */
+       uint16_t z_flags;       /* general purpose flags */
+       int      z_fd;          /* zip file descriptor */
+       uint32_t z_time;        /* timestamp (mtime) */
+       uint32_t z_crc;         /* crc32 of uncompressed data */
+       uint32_t z_ocrc;        /* crc32 of uncompressed data (from header) */
+       uint32_t z_hlen;        /* length of the zip header */
+       uint64_t z_ulen;        /* uncompressed data length (from header) */
+       uint64_t z_total_in;    /* # bytes in */
+       uint64_t z_total_out;   /* # bytes out */
+};
+
+static int
+get_byte(struct zip_state *s)
+{
+       if (s->z_eof)
+               return EOF;
+
+       if (s->z_stream.avail_in == 0) {
+               ssize_t nread = read(s->z_fd, s->z_buf, Z_BUFSIZE);
+               if (nread <= 0) {
+                       s->z_eof = 1;
+                       return EOF;
+               }
+               s->z_stream.avail_in = nread;
+               s->z_stream.next_in = s->z_buf;
+       }
+       s->z_stream.avail_in--;
+       return *s->z_stream.next_in++;
+}
+
+static uint16_t
+get_uint16(struct zip_state *s)
+{
+       uint16_t x;
+
+       x  = ((uint16_t)(get_byte(s) & 0xff));
+       x |= ((uint16_t)(get_byte(s) & 0xff))<<8;
+       return x;
+}
+
+static uint32_t
+get_uint32(struct zip_state *s)
+{
+       uint32_t x;
+
+       x  = ((uint32_t)(get_byte(s) & 0xff));
+       x |= ((uint32_t)(get_byte(s) & 0xff))<<8;
+       x |= ((uint32_t)(get_byte(s) & 0xff))<<16;
+       x |= ((uint32_t)(get_byte(s) & 0xff))<<24;
+       return x;
+}
+
+static uint64_t
+get_uint64(struct zip_state *s)
+{
+       uint64_t x;
+
+       x  = ((uint64_t)(get_byte(s) & 0xff));
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<8;
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<16;
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<24;
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<32;
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<40;
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<48;
+       x |= ((uint64_t)(get_byte(s) & 0xff))<<56;
+       return x;
+}
+
+static int
+get_header(struct zip_state *s, char *name, int gotmagic)
+{
+       int c, got_mtime = 0;
+       uint16_t namelen, extlen;
+       uint32_t sig;
+
+       /* Check the zip local file header signature. */
+       if (!gotmagic) {
+               sig = get_uint32(s);
+               if (sig != LOCSIG) {
+                       errno = EFTYPE;
+                       return -1;
+               }
+       } else {
+               sig = get_uint16(s);
+               if (sig != LOCREM) {
+                       errno = EFTYPE;
+                       return -1;
+               }
+       }
+
+       /* Read the local header fields. */
+       get_uint16(s);                  /* min version */
+       s->z_flags = get_uint16(s);     /* general purpose flags */
+       s->z_method = get_uint16(s);    /* compression method */
+       get_uint32(s);                  /* DOS format mtime */
+       s->z_ocrc = get_uint32(s);      /* 32-bit CRC */
+       get_uint32(s);                  /* compressed size */
+       s->z_ulen = get_uint32(s);      /* uncompressed size */
+       namelen = get_uint16(s);        /* file name length */
+       extlen = get_uint16(s);         /* length of extra fields */
+       s->z_hlen = LOCHDR;
+
+       /* Encrypted files not supported. */
+       if (s->z_flags & CRPFLG) {
+               errno = EFTYPE;
+               return -1;
+       }
+
+       /* Supported compression methods are deflate and store. */
+       if (s->z_method != Z_DEFLATED && s->z_method != Z_STORED) {
+               errno = EFTYPE;
+               return -1;
+       }
+
+       /* Store the original file name if present. */
+       if (namelen != 0 && name != NULL) {
+               const char *ep = name + PATH_MAX - 1;
+               for (; namelen > 0; namelen--) {
+                       if ((c = get_byte(s)) == EOF)
+                               break;
+                       s->z_hlen++;
+                       if (c == '\0')
+                               break;
+                       if (name < ep)
+                               *name++ = c;
+               }
+               *name = '\0';
+       }
+
+       /* Parse extra fields, if any. */
+       while (extlen >= 4) {
+               uint16_t sig;
+               int fieldlen;
+
+               sig = get_uint16(s);
+               fieldlen = get_uint16(s);
+               s->z_hlen += 4;
+               extlen -= 4;
+
+               switch (sig) {
+               case EF_ZIP64:
+                       /* 64-bit file sizes */
+                       s->z_zip64 = 1;
+                       if (fieldlen >= 8) {
+                               s->z_ulen = get_uint64(s);
+                               s->z_hlen += 8;
+                               extlen -= 8;
+                               fieldlen -= 8;
+                       }
+                       break;
+               case EF_TIME:
+                       /* UTC timestamps */
+                       if ((c = get_byte(s)) == EOF)
+                               break;
+                       s->z_hlen++;
+                       extlen--;
+                       fieldlen--;
+                       if (c & 1) {
+                               got_mtime = 1;
+                               s->z_time = get_uint32(s);
+                               s->z_hlen += 4;
+                               extlen -= 4;
+                               fieldlen -= 4;
+                       }
+                       break;
+               case EF_IZUNIX:
+                       /* We prefer EF_TIME if it is present. */
+                       if (got_mtime)
+                               break;
+
+                       /* skip atime, store mtime. */
+                       (void)get_uint32(s);
+                       s->z_time = get_uint32(s);
+                       s->z_hlen += 8;
+                       extlen -= 8;
+                       fieldlen -= 8;
+                       break;
+               default:
+                       break;
+               }
+
+               /* Consume any unparsed bytes in the field. */
+               for (; fieldlen > 0; fieldlen--) {
+                       if (get_byte(s) == EOF)
+                               break;
+                       s->z_hlen++;
+                       extlen--;
+               }
+       }
+       for (; extlen > 0; extlen--) {
+               if (get_byte(s) == EOF)
+                       break;
+               s->z_hlen++;
+       }
+
+       return 0;
+}
+
+void *
+zip_ropen(int fd, char *name, int gotmagic)
+{
+       struct zip_state *s;
+
+       if (fd < 0)
+               return NULL;
+
+       if ((s = calloc(1, sizeof(*s))) == NULL)
+               return NULL;
+
+       s->z_fd = fd;
+       s->z_crc = crc32(0, NULL, 0);
+
+       /* Request a raw inflate, there is no zlib/gzip header present. */
+       if (inflateInit2(&s->z_stream, -MAX_WBITS) != Z_OK) {
+               free(s);
+               return NULL;
+       }
+       s->z_stream.next_in = s->z_buf;
+       s->z_stream.avail_out = sizeof(s->z_buf);
+
+       /* Read the zip header. */
+       if (get_header(s, name, gotmagic) != 0) {
+               zip_close(s, NULL, NULL, NULL);
+               s = NULL;
+       }
+
+       return s;
+}
+
+static int
+zip_store(struct zip_state *s)
+{
+       int error = Z_OK;
+       uLong copy_len;
+
+       if ((int)s->z_stream.avail_in <= 0)
+               return s->z_stream.avail_in == 0 ? Z_STREAM_END : Z_DATA_ERROR;
+
+       /* For stored files we rely on z_ulen being set. */
+       copy_len = MINIMUM(s->z_stream.avail_out, s->z_stream.avail_in);
+       if (copy_len >= s->z_ulen - s->z_total_out) {
+               /* Don't copy past the end of the file. */
+               copy_len = s->z_ulen - s->z_total_out;
+               error = Z_STREAM_END;
+       }
+
+       memcpy(s->z_stream.next_out, s->z_stream.next_in, copy_len);
+       s->z_stream.next_out += copy_len;
+       s->z_stream.avail_out -= copy_len;
+       s->z_stream.next_in += copy_len;
+       s->z_stream.avail_in -= copy_len;
+       s->z_total_in += copy_len;
+       s->z_total_out += copy_len;
+
+       return error;
+}
+
+int
+zip_read(void *cookie, char *buf, int len)
+{
+       struct zip_state *s = cookie;
+       Bytef *ubuf = buf;
+       int error = Z_OK;
+
+       s->z_stream.next_out = ubuf;
+       s->z_stream.avail_out = len;
+
+       while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) {
+               if (s->z_stream.avail_in == 0) {
+                       ssize_t nread = read(s->z_fd, s->z_buf, Z_BUFSIZE);
+                       switch (nread) {
+                       case -1:
+                               goto bad;
+                       case 0:
+                               s->z_eof = 1;
+                               continue;
+                       default:
+                               s->z_stream.avail_in = nread;
+                               s->z_stream.next_in = s->z_buf;
+                       }
+               }
+
+               if (s->z_method == Z_DEFLATED) {
+                       /*
+                        * Prevent overflow of z_stream.total_{in,out}
+                        * which may be 32-bit.
+                        */
+                       uLong prev_total_in = s->z_stream.total_in;
+                       uLong prev_total_out = s->z_stream.total_out;
+                       error = inflate(&s->z_stream, Z_NO_FLUSH);
+                       s->z_total_in += s->z_stream.total_in - prev_total_in;
+                       s->z_total_out += s->z_stream.total_out - 
prev_total_out;
+               } else {
+                       /* File stored uncompressed. */
+                       error = zip_store(s);
+               }
+
+               switch (error) {
+               case Z_DATA_ERROR:
+                       errno = EINVAL;
+                       goto bad;
+               case Z_BUF_ERROR:
+                       errno = EIO;
+                       goto bad;
+               case Z_STREAM_END:
+                       /*
+                        * Check CRC and original size.
+                        * These may be found in the local header or, if
+                        * EXTFLG is set, immediately following the file.
+                        */
+                       s->z_eof = 1;
+                       s->z_crc = crc32(s->z_crc, ubuf,
+                           (uInt)(s->z_stream.next_out - ubuf));
+
+                       if (s->z_flags & EXTFLG) {
+                               /*
+                                * Read data descriptor:
+                                *  signature 0x08074b50: 4 bytes
+                                *  CRC-32: 4 bytes
+                                *  compressed size: 4 or 8 bytes
+                                *  uncompressed size: 4 or 8 bytes
+                                */
+                               get_uint32(s);
+                               s->z_ocrc = get_uint32(s);
+                               if (s->z_zip64) {
+                                       get_uint64(s);
+                                       s->z_ulen = get_uint64(s);
+                                       s->z_hlen += 8;
+                               } else {
+                                       get_uint32(s);
+                                       s->z_ulen = get_uint32(s);
+                               }
+                               s->z_hlen += EXTHDR;
+                       }
+                       if (s->z_ulen != s->z_total_out) {
+                               errno = EIO;
+                               return -1;
+                       }
+                       if (s->z_ocrc != s->z_crc) {
+                               errno = EINVAL;
+                               goto bad;
+                       }
+                       break;
+               }
+       }
+       return len - s->z_stream.avail_out;
+bad:
+       return -1;
+}
+
+int
+zip_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
+{
+       struct zip_state *s = cookie;
+       int error = 0;
+
+       if (s == NULL) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (info != NULL) {
+               info->mtime = s->z_time;
+               info->crc = s->z_crc;
+               info->hlen = s->z_hlen;
+               info->total_in = s->z_total_in;
+               info->total_out = s->z_total_out;
+       }
+
+       if (s->z_stream.state != NULL) {
+               /* inflateEnd() overwrites errno. */
+               (void)inflateEnd(&s->z_stream);
+       }
+
+       /*
+        * Check for the presence of additional files in the .zip.
+        * Do not remove the original if we cannot extract all the files.
+        */
+       s->z_eof = 0;
+       if (get_header(s, NULL, 0) == 0) {
+               errno = EEXIST;
+               error = -1;
+       }
+
+       setfile(name, s->z_fd, sb);
+       (void)close(s->z_fd);
+
+       free(s);
+
+       return error;
+}

Reply via email to