Thanks for the patches, Mark! I installed them, added a regression test,
and adjusted the new code to match GNU style (which is what we do for
gzip nowadays). While I was at it I added static checks against bizarre
but standard-conforming values of EOF, something that has been bugging
me for a while.
For the record I'm attaching the patches involved, and I'm closing the
bug report.
oset: if you would like your name and email address in the THANKS file,
please let me know your name or nom de plume.From c651ce70ae78af23343b5140c9700aa8de42808a Mon Sep 17 00:00:00 2001
From: Mark Adler <g...@madler.net>
Date: Sun, 15 Jun 2025 18:19:00 -0700
Subject: [PATCH 01/10] maint: replace tabs in the source code with spaces
---
deflate.c | 4 ++--
gzip.c | 2 +-
gzip.h | 12 ++++++------
inflate.c | 2 +-
lzw.h | 4 ++--
unlzw.c | 38 +++++++++++++++++++-------------------
util.c | 14 +++++++-------
7 files changed, 38 insertions(+), 38 deletions(-)
diff --git a/deflate.c b/deflate.c
index 8eb5f96..c0148e9 100644
--- a/deflate.c
+++ b/deflate.c
@@ -191,8 +191,8 @@ unsigned int near prev_length;
unsigned near strstart; /* start of string to insert */
unsigned near match_start; /* start of matching string */
-static int eofile; /* flag set at end of input file */
-static unsigned lookahead; /* number of valid bytes ahead in window */
+static int eofile; /* flag set at end of input file */
+static unsigned lookahead; /* number of valid bytes ahead in window */
unsigned max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this length.
diff --git a/gzip.c b/gzip.c
index 0231ffa..4df879e 100644
--- a/gzip.c
+++ b/gzip.c
@@ -207,7 +207,7 @@ static bool stdin_was_read;
off_t bytes_in; /* number of input bytes */
off_t bytes_out; /* number of output bytes */
static off_t total_in; /* input bytes for all files */
-static off_t total_out; /* output bytes for all files */
+static off_t total_out; /* output bytes for all files */
char ifname[MAX_PATH_LEN]; /* input file name */
char ofname[MAX_PATH_LEN]; /* output file name */
static char dfname[MAX_PATH_LEN]; /* name of dir containing output file */
diff --git a/gzip.h b/gzip.h
index 855a88b..5bd0f69 100644
--- a/gzip.h
+++ b/gzip.h
@@ -69,7 +69,7 @@ extern int method; /* compression method */
* is done in window except for unlzw.
*/
-#ifndef INBUFSIZ
+#ifndef INBUFSIZ
# ifdef SMALL_MEM
# define INBUFSIZ 0x2000 /* input buffer size */
# else
@@ -78,7 +78,7 @@ extern int method; /* compression method */
#endif
#define INBUF_EXTRA 64 /* required by unlzw() */
-#ifndef OUTBUFSIZ
+#ifndef OUTBUFSIZ
# ifdef SMALL_MEM
# define OUTBUFSIZ 8192 /* output buffer size */
# else
@@ -148,10 +148,10 @@ typedef int file_t; /* Do not use stdio */
#define NO_FILE (-1) /* in memory compression */
-#define PACK_MAGIC "\037\036" /* Magic header for packed files */
-#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */
-#define OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x */
-#define LZH_MAGIC "\037\240" /* Magic header for SCO LZH Compress files*/
+#define PACK_MAGIC "\037\036" /* Magic header for packed files */
+#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */
+#define OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x */
+#define LZH_MAGIC "\037\240" /* Magic header for SCO LZH Compress files*/
#define PKZIP_MAGIC "\120\113\003\004" /* Magic header for pkzip files */
/* gzip flag byte */
diff --git a/inflate.c b/inflate.c
index c0c84e8..c6b3105 100644
--- a/inflate.c
+++ b/inflate.c
@@ -799,7 +799,7 @@ inflate_dynamic(void)
return i; /* incomplete code set */
}
- if (tl == NULL) /* Grrrhhh */
+ if (tl == NULL) /* Grrrhhh */
return 2;
/* read in literal and distance code lengths */
diff --git a/lzw.h b/lzw.h
index 57f5388..0ea3080 100644
--- a/lzw.h
+++ b/lzw.h
@@ -21,7 +21,7 @@
#endif
#define INIT_BITS 9 /* Initial number of bits per code */
-#define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */
+#define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */
#define BIT_MASK 0x1f /* Mask for 'number of compression bits' */
/* Mask 0x20 is reserved to mean a fourth header byte, and 0x40 is free.
@@ -40,7 +40,7 @@
#define LZW_RESERVED 0x60 /* reserved bits */
-#define CLEAR 256 /* flush the dictionary */
+#define CLEAR 256 /* flush the dictionary */
#define FIRST (CLEAR+1) /* first free entry */
extern int maxbits; /* max bits per code for LZW */
diff --git a/unlzw.c b/unlzw.c
index ed77970..9be20d1 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -22,33 +22,33 @@ typedef unsigned long count_int;
typedef unsigned short count_short;
typedef unsigned long cmp_code_int;
-#define MAXCODE(n) (1L << (n))
+#define MAXCODE(n) (1L << (n))
-#ifndef BYTEORDER
-# define BYTEORDER 0000
+#ifndef BYTEORDER
+# define BYTEORDER 0000
#endif
-#ifndef NOALLIGN
-# define NOALLIGN 0
+#ifndef NOALLIGN
+# define NOALLIGN 0
#endif
-union bytes {
+union bytes {
long word;
struct {
#if BYTEORDER == 4321
- char_type b1;
- char_type b2;
- char_type b3;
- char_type b4;
+ char_type b1;
+ char_type b2;
+ char_type b3;
+ char_type b4;
#else
#if BYTEORDER == 1234
- char_type b4;
- char_type b3;
- char_type b2;
- char_type b1;
+ char_type b4;
+ char_type b3;
+ char_type b2;
+ char_type b1;
#else
-# undef BYTEORDER
+# undef BYTEORDER
int dummy;
#endif
#endif
@@ -72,13 +72,13 @@ union bytes {
#ifndef MAXSEG_64K
/* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
# define tab_prefixof(i) tab_prefix[i]
-# define clear_tab_prefixof() memzero(tab_prefix, 256);
+# define clear_tab_prefixof() memzero(tab_prefix, 256);
#else
/* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
/* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */
ush *tab_prefix[2];
# define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
-# define clear_tab_prefixof() \
+# define clear_tab_prefixof() \
memzero(tab_prefix0, 128), \
memzero(tab_prefix1, 128);
#endif
@@ -241,7 +241,7 @@ unlzw (int in, int out)
*--stackp = tab_suffixof(code);
code = tab_prefixof(code);
}
- *--stackp = (char_type)(finchar = tab_suffixof(code));
+ *--stackp = (char_type)(finchar = tab_suffixof(code));
/* And put them out in forward order */
{
@@ -273,7 +273,7 @@ unlzw (int in, int out)
tab_suffixof(code) = (char_type)finchar;
free_ent = code+1;
}
- oldcode = incode; /* Remember previous code. */
+ oldcode = incode; /* Remember previous code. */
}
} while (rsize != 0);
diff --git a/util.c b/util.c
index e3eb739..b35a995 100644
--- a/util.c
+++ b/util.c
@@ -300,7 +300,7 @@ make_simple_name (char *name)
If the vector would be empty, do not allocate storage,
do not set *ARGCP and *ARGVP, and return NULL. */
-#define SEPARATOR " \t" /* separators in env variable */
+#define SEPARATOR " \t" /* separators in env variable */
char *add_envopt(
int *argcp, /* pointer to argc */
@@ -319,11 +319,11 @@ char *add_envopt(
env_val = xstrdup (env_val);
for (p = env_val; *p; nargc++ ) { /* move through env_val */
- p += strspn(p, SEPARATOR); /* skip leading separators */
+ p += strspn(p, SEPARATOR); /* skip leading separators */
if (*p == '\0') break;
- p += strcspn(p, SEPARATOR); /* find end of word */
- if (*p) *p++ = '\0'; /* mark it */
+ p += strcspn(p, SEPARATOR); /* find end of word */
+ if (*p) *p++ = '\0'; /* mark it */
}
if (nargc == 0) {
free(env_val);
@@ -342,9 +342,9 @@ char *add_envopt(
/* Then copy the environment args */
for (p = env_val; nargc > 0; nargc--) {
- p += strspn(p, SEPARATOR); /* skip separators */
- *(nargv++) = p; /* store start */
- while (*p++) ; /* skip over word */
+ p += strspn(p, SEPARATOR); /* skip separators */
+ *(nargv++) = p; /* store start */
+ while (*p++) ; /* skip over word */
}
*nargv = NULL;
--
2.48.1
From 6264387434cceca23f6dcc9084c05e3c40a337a4 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Jun 2025 09:45:41 -0700
Subject: [PATCH 02/10] maint: use GNU style in lines just untabbed
---
deflate.c | 4 ++--
gzip.c | 2 +-
gzip.h | 8 ++++----
lzw.h | 4 ++--
unlzw.c | 50 +++++++++++++++++++++++++-------------------------
util.c | 16 +++++++++-------
6 files changed, 43 insertions(+), 41 deletions(-)
diff --git a/deflate.c b/deflate.c
index c0148e9..aad8723 100644
--- a/deflate.c
+++ b/deflate.c
@@ -191,8 +191,8 @@ unsigned int near prev_length;
unsigned near strstart; /* start of string to insert */
unsigned near match_start; /* start of matching string */
-static int eofile; /* flag set at end of input file */
-static unsigned lookahead; /* number of valid bytes ahead in window */
+static int eofile; /* Flag set at end of input file. */
+static unsigned lookahead; /* Number of valid bytes ahead in window. */
unsigned max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this length.
diff --git a/gzip.c b/gzip.c
index 4df879e..ab1aaf2 100644
--- a/gzip.c
+++ b/gzip.c
@@ -207,7 +207,7 @@ static bool stdin_was_read;
off_t bytes_in; /* number of input bytes */
off_t bytes_out; /* number of output bytes */
static off_t total_in; /* input bytes for all files */
-static off_t total_out; /* output bytes for all files */
+static off_t total_out; /* Output bytes for all files. */
char ifname[MAX_PATH_LEN]; /* input file name */
char ofname[MAX_PATH_LEN]; /* output file name */
static char dfname[MAX_PATH_LEN]; /* name of dir containing output file */
diff --git a/gzip.h b/gzip.h
index 5bd0f69..22ae30d 100644
--- a/gzip.h
+++ b/gzip.h
@@ -148,10 +148,10 @@ typedef int file_t; /* Do not use stdio */
#define NO_FILE (-1) /* in memory compression */
-#define PACK_MAGIC "\037\036" /* Magic header for packed files */
-#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */
-#define OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x */
-#define LZH_MAGIC "\037\240" /* Magic header for SCO LZH Compress files*/
+#define PACK_MAGIC "\037\036" /* Magic header for packed files. */
+#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B. */
+#define OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x. */
+#define LZH_MAGIC "\037\240" /* Magic header for SCO LZH Compress. */
#define PKZIP_MAGIC "\120\113\003\004" /* Magic header for pkzip files */
/* gzip flag byte */
diff --git a/lzw.h b/lzw.h
index 0ea3080..7801d28 100644
--- a/lzw.h
+++ b/lzw.h
@@ -21,7 +21,7 @@
#endif
#define INIT_BITS 9 /* Initial number of bits per code */
-#define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */
+#define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D. */
#define BIT_MASK 0x1f /* Mask for 'number of compression bits' */
/* Mask 0x20 is reserved to mean a fourth header byte, and 0x40 is free.
@@ -40,7 +40,7 @@
#define LZW_RESERVED 0x60 /* reserved bits */
-#define CLEAR 256 /* flush the dictionary */
+#define CLEAR 256 /* Flush the dictionary. */
#define FIRST (CLEAR+1) /* first free entry */
extern int maxbits; /* max bits per code for LZW */
diff --git a/unlzw.c b/unlzw.c
index 9be20d1..a0dab13 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -22,40 +22,40 @@ typedef unsigned long count_int;
typedef unsigned short count_short;
typedef unsigned long cmp_code_int;
-#define MAXCODE(n) (1L << (n))
+#define MAXCODE(n) (1L << (n))
#ifndef BYTEORDER
-# define BYTEORDER 0000
+# define BYTEORDER 0000
#endif
-#ifndef NOALLIGN
-# define NOALLIGN 0
+#ifndef NOALIGN
+# define NOALIGN 0
#endif
-union bytes {
- long word;
- struct {
+union bytes
+{
+ long int word;
+ struct
+ {
#if BYTEORDER == 4321
- char_type b1;
- char_type b2;
- char_type b3;
- char_type b4;
-#else
-#if BYTEORDER == 1234
- char_type b4;
- char_type b3;
- char_type b2;
- char_type b1;
+ char_type b1;
+ char_type b2;
+ char_type b3;
+ char_type b4;
+#elif BYTEORDER == 1234
+ char_type b4;
+ char_type b3;
+ char_type b2;
+ char_type b1;
#else
-# undef BYTEORDER
- int dummy;
-#endif
+# undef BYTEORDER
+ int dummy;
#endif
- } bytes;
+ } bytes;
};
-#if BYTEORDER == 4321 && NOALLIGN == 1
+#if BYTEORDER == 4321 && NOALIGN == 1
# define input(b,o,c,n,m){ \
(c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
(o) += (n); \
@@ -72,13 +72,13 @@ union bytes {
#ifndef MAXSEG_64K
/* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
# define tab_prefixof(i) tab_prefix[i]
-# define clear_tab_prefixof() memzero(tab_prefix, 256);
+# define clear_tab_prefixof() memzero (tab_prefix, 256);
#else
/* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
/* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */
ush *tab_prefix[2];
# define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
-# define clear_tab_prefixof() \
+# define clear_tab_prefixof() \
memzero(tab_prefix0, 128), \
memzero(tab_prefix1, 128);
#endif
@@ -241,7 +241,7 @@ unlzw (int in, int out)
*--stackp = tab_suffixof(code);
code = tab_prefixof(code);
}
- *--stackp = (char_type)(finchar = tab_suffixof(code));
+ *--stackp = (char_type) (finchar = tab_suffixof (code));
/* And put them out in forward order */
{
diff --git a/util.c b/util.c
index b35a995..1496e43 100644
--- a/util.c
+++ b/util.c
@@ -300,7 +300,7 @@ make_simple_name (char *name)
If the vector would be empty, do not allocate storage,
do not set *ARGCP and *ARGVP, and return NULL. */
-#define SEPARATOR " \t" /* separators in env variable */
+#define SEPARATOR " \t" /* Separators in environment variable. */
char *add_envopt(
int *argcp, /* pointer to argc */
@@ -319,11 +319,12 @@ char *add_envopt(
env_val = xstrdup (env_val);
for (p = env_val; *p; nargc++ ) { /* move through env_val */
- p += strspn(p, SEPARATOR); /* skip leading separators */
+ p += strspn (p, SEPARATOR); /* Skip leading separators. */
if (*p == '\0') break;
- p += strcspn(p, SEPARATOR); /* find end of word */
- if (*p) *p++ = '\0'; /* mark it */
+ p += strcspn (p, SEPARATOR); /* Find end of word. */
+ if (*p) /* Mark it if found. */
+ *p++ = '\0';
}
if (nargc == 0) {
free(env_val);
@@ -342,9 +343,10 @@ char *add_envopt(
/* Then copy the environment args */
for (p = env_val; nargc > 0; nargc--) {
- p += strspn(p, SEPARATOR); /* skip separators */
- *(nargv++) = p; /* store start */
- while (*p++) ; /* skip over word */
+ p += strspn (p, SEPARATOR); /* skip separators */
+ *nargv++ = p; /* store start */
+ while (*p++) /* skip over word */
+ continue;
}
*nargv = NULL;
--
2.48.1
From 886d2c6965ffc2bfc4d8700e56d82ff23620930c Mon Sep 17 00:00:00 2001
From: Mark Adler <g...@madler.net>
Date: Mon, 16 Jun 2025 10:13:06 -0700
Subject: [PATCH 03/10] zip: correctly handle PKZIP signatures and the local
header
This fixes bugs in the lack of detection of spanning signatures,
lack of detection of empty zip files, checking the compression
method, failure to skip over the file name and extra field in the
SMALL_MEM case, failure to get the CRC and uncompressed length in
the SMALL_MEM case, failure to check for invalid combinations of
header information, the lack of detection of a Zip64 entry, and
a possible decompression of a zip entry after a gzip member.
* gzip.c (get_method): New arg FIRST. All callers changed.
* gzip.h (PKZIP_MAGIC): Now just the two-byte magic header.
* unzip.c (SPNSIG, ONESIG, Z64SIG, ENDSIG): New macros.
(RAND_HEAD_LEN, decrypt): Remove. All uses removed.
(orig_crc, orig_len): New static vars.
(check_zipfile, unzip): Handle PKZIP signatures and the local header.
---
gzip.c | 17 ++++----
gzip.h | 2 +-
unzip.c | 121 ++++++++++++++++++++++++++++++++++----------------------
3 files changed, 83 insertions(+), 57 deletions(-)
diff --git a/gzip.c b/gzip.c
index ab1aaf2..c172449 100644
--- a/gzip.c
+++ b/gzip.c
@@ -302,7 +302,7 @@ static int open_input_file (char *iname, struct stat *sbuf);
static void discard_input_bytes (size_t nbytes, unsigned int flags);
static int make_ofname (void);
static void shorten_name (char *name);
-static int get_method (int in);
+static int get_method (int in, int first);
static void do_list (int method);
static int check_ofname (void);
static void copy_stat (struct stat *ifstat);
@@ -734,7 +734,7 @@ treat_stdin ()
stdin_was_read = true;
if (decompress) {
- method = get_method(ifd);
+ method = get_method(ifd, 1);
if (method < 0) {
do_exit(exit_code); /* error message already emitted */
}
@@ -749,7 +749,7 @@ treat_stdin ()
if (input_eof ())
break;
- method = get_method(ifd);
+ method = get_method(ifd, 0);
if (method < 0) return; /* error message already emitted */
}
@@ -915,7 +915,7 @@ treat_file (char *iname)
part_nb = 0;
if (decompress) {
- method = get_method(ifd); /* updates ofname if original given */
+ method = get_method(ifd, 1); /* updates ofname if original given */
if (method < 0) {
close(ifd);
return; /* error message already emitted */
@@ -955,7 +955,7 @@ treat_file (char *iname)
if (input_eof ())
break;
- method = get_method(ifd);
+ method = get_method(ifd, 0);
if (method < 0) break; /* error message already emitted */
}
@@ -1440,12 +1440,12 @@ discard_input_bytes (size_t nbytes, unsigned int flags)
* Updates time_stamp if there is one and neither -m nor -n is used.
* This function may be called repeatedly for an input file consisting
* of several contiguous gzip'ed members.
- * 'in' is the input file descriptor.
+ * 'in' is the input file descriptor, 'first' is true for first call on in
* IN assertions: there is at least one remaining compressed member.
* If the member is a zip file, it must be the only one.
*/
static int
-get_method (int in)
+get_method (int in, int first)
{
uch flags; /* compression flags */
uch magic[10]; /* magic header */
@@ -1609,8 +1609,7 @@ get_method (int in)
header_bytes = inptr + 2*4; /* include crc and size */
}
- } else if (4 <= insize
- && memcmp((char*)inbuf, PKZIP_MAGIC, 4) == 0) {
+ } else if (first && memcmp(magic, PKZIP_MAGIC, 2) == 0) {
/* To simplify the code, we support a zip file when alone only.
* We are thus guaranteed that the entire local header fits in inbuf.
*/
diff --git a/gzip.h b/gzip.h
index 22ae30d..e8eca4f 100644
--- a/gzip.h
+++ b/gzip.h
@@ -152,7 +152,7 @@ typedef int file_t; /* Do not use stdio */
#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B. */
#define OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x. */
#define LZH_MAGIC "\037\240" /* Magic header for SCO LZH Compress. */
-#define PKZIP_MAGIC "\120\113\003\004" /* Magic header for pkzip files */
+#define PKZIP_MAGIC "\120\113" /* Magic header for pkzip files. */
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
diff --git a/unzip.c b/unzip.c
index 1bd9ca7..8dfb454 100644
--- a/unzip.c
+++ b/unzip.c
@@ -32,12 +32,16 @@
#include "gzip.h"
/* PKZIP header definitions */
-#define LOCSIG 0x04034b50L /* four-byte lead-in (lsb first) */
+#define SPNSIG 0x08074b50L /* spanned zip file segment marker */
+#define ONESIG 0x30304b50L /* was spanned but re-marked as one segment */
+#define Z64SIG 0x06064b50L /* zip64 end record */
+#define ENDSIG 0x06054b50L /* end record */
+#define LOCSIG 0x04034b50L /* local header */
#define LOCFLG 6 /* offset of bit flag */
#define CRPFLG 1 /* bit for encrypted entry */
#define EXTFLG 8 /* bit for extended local header */
#define LOCHOW 8 /* offset of compression method */
-/* #define LOCTIM 10 UNUSED file mod time (for decryption) */
+/* #define LOCTIM 10 UNUSED file mod time */
#define LOCCRC 14 /* offset of crc */
#define LOCSIZ 18 /* offset of compressed size */
#define LOCLEN 22 /* offset of uncompressed length */
@@ -45,16 +49,16 @@
#define LOCEXT 28 /* offset of extra field length */
#define LOCHDR 30 /* size of local header, including sig */
#define EXTHDR 16 /* size of extended local header, inc sig */
-#define RAND_HEAD_LEN 12 /* length of encryption random header */
/* Globals */
ulg unzip_crc; /* CRC found by 'unzip'. */
-static int decrypt; /* flag to turn on decryption */
static int pkzip = 0; /* set for a pkzip file */
static int ext_header = 0; /* set if extended local header */
+static ulg orig_crc; /* CRC from gzip trailer or zip local header */
+static ulg orig_len; /* uncompressed length from trailer or header */
/* ===========================================================================
* Check zip file and advance inptr to the start of the compressed data.
@@ -64,43 +68,79 @@ static int ext_header = 0; /* set if extended local header */
int
check_zipfile (int in)
{
- uch *h = inbuf + inptr; /* first local header */
-
+ ulg sig, skip;
+ uch *h = inbuf + inptr;
+ const char *bad = "not a valid zip file";
ifd = in;
+ do {
+ if (insize - inptr < 4)
+ break;
+ sig = LG(h);
+ if (sig == SPNSIG || sig == ONESIG) {
+ /* skip spanning signature */
+ h += 4;
+ inptr += 4;
+ if (insize - inptr < 4)
+ break;
+ sig = LG(h);
+ }
+ if (sig == ENDSIG || sig == Z64SIG) {
+ /* empty zip file */
+ bad = "is an empty zip file";
+ break;
+ }
+ if (sig != LOCSIG || insize - inptr < LOCHDR)
+ /* not a local header */
+ break;
+ inptr += LOCHDR;
+
+ /* Get compression method */
+ method = SH(h + LOCHOW);
+ if (method != STORED && method != DEFLATED) {
+ bad = "first entry not deflated or stored -- use unzip";
+ break;
+ }
- /* Check validity of local header, and skip name and extra fields */
- inptr += LOCHDR;
- if (inptr <= insize)
- inptr += SH(h + LOCFIL) + SH(h + LOCEXT);
+ /* Check for encryption */
+ if ((h[LOCFLG] & CRPFLG) != 0) {
+ bad = "encrypted file -- use unzip";
+ break;
+ }
- if (inptr > insize || LG(h) != LOCSIG) {
- fprintf(stderr, "\n%s: %s: not a valid zip file\n",
- program_name, ifname);
- exit_code = ERROR;
- return ERROR;
- }
- method = h[LOCHOW];
- if (method != STORED && method != DEFLATED) {
- fprintf(stderr,
- "\n%s: %s: first entry not deflated or stored -- use unzip\n",
- program_name, ifname);
- exit_code = ERROR;
- return ERROR;
- }
+ /* Save header information for unzip() */
+ ext_header = (h[LOCFLG] & EXTFLG) != 0;
+ orig_crc = LG(h + LOCCRC);
+ orig_len = LG(h + LOCLEN);
+ if (method == STORED && (ext_header || orig_len != LG(h + LOCSIZ)))
+ break;
+ if (!ext_header && orig_len == 0xffffffff) {
+ bad = "Zip64 entry -- not supported, use unzip";
+ break;
+ }
- /* If entry encrypted, decrypt and validate encryption header */
- if ((decrypt = h[LOCFLG] & CRPFLG) != 0) {
- fprintf(stderr, "\n%s: %s: encrypted file -- use unzip\n",
- program_name, ifname);
+ /* Get ofname and timestamp from local header (to be done) */
+
+ /* Skip over the file name and extra field (need to loop for the
+ SMALL_MEM case) */
+ skip = (ulg)SH(h + LOCFIL) + (ulg)SH(h + LOCEXT);
+ while (skip > insize - inptr) {
+ skip -= insize - inptr;
+ fill_inbuf(0); /* will error out on no more input */
+ inptr = 0;
+ }
+ inptr += skip;
+
+ /* Good local header */
+ pkzip = 1;
+ bad = NULL;
+ } while (0);
+
+ if (bad != NULL) {
+ fprintf(stderr, "\n%s: %s: %s\n", program_name, ifname, bad);
exit_code = ERROR;
return ERROR;
}
- /* Save flags for unzip() */
- ext_header = (h[LOCFLG] & EXTFLG) != 0;
- pkzip = 1;
-
- /* Get ofname and timestamp from local header (to be done) */
return OK;
}
@@ -116,8 +156,6 @@ check_zipfile (int in)
int
unzip (int in, int out)
{
- ulg orig_crc = 0; /* original crc */
- ulg orig_len = 0; /* original uncompressed length */
off_t orig_bytes_out = bytes_out;
int n;
uch buf[EXTHDR]; /* extended local header */
@@ -128,11 +166,6 @@ unzip (int in, int out)
updcrc(NULL, 0); /* initialize crc */
- if (pkzip && !ext_header) { /* crc and length at the end otherwise */
- orig_crc = LG(inbuf + LOCCRC);
- orig_len = LG(inbuf + LOCLEN);
- }
-
/* Decompress */
if (method == DEFLATED) {
@@ -150,13 +183,7 @@ unzip (int in, int out)
} else if (pkzip && method == STORED) {
- register ulg n = LG(inbuf + LOCLEN);
-
- if (n != LG(inbuf + LOCSIZ) - (decrypt ? RAND_HEAD_LEN : 0)) {
-
- fprintf(stderr, "len %lu, siz %lu\n", n, LG(inbuf + LOCSIZ));
- gzip_error ("invalid compressed data--length mismatch");
- }
+ register ulg n = orig_len;
while (n--) {
uch c = (uch)get_byte();
put_ubyte(c);
--
2.48.1
From 29cad37003dd5f78d3e869c8931103a34ec9231b Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Jun 2025 11:21:31 -0700
Subject: [PATCH 04/10] gzip: get_method 2nd arg is bool
* gzip.c (get_method): Use bool for boolean.
---
gzip.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/gzip.c b/gzip.c
index c172449..43146c2 100644
--- a/gzip.c
+++ b/gzip.c
@@ -302,7 +302,7 @@ static int open_input_file (char *iname, struct stat *sbuf);
static void discard_input_bytes (size_t nbytes, unsigned int flags);
static int make_ofname (void);
static void shorten_name (char *name);
-static int get_method (int in, int first);
+static int get_method (int in, bool first);
static void do_list (int method);
static int check_ofname (void);
static void copy_stat (struct stat *ifstat);
@@ -734,7 +734,7 @@ treat_stdin ()
stdin_was_read = true;
if (decompress) {
- method = get_method(ifd, 1);
+ method = get_method (ifd, true);
if (method < 0) {
do_exit(exit_code); /* error message already emitted */
}
@@ -749,7 +749,7 @@ treat_stdin ()
if (input_eof ())
break;
- method = get_method(ifd, 0);
+ method = get_method (ifd, false);
if (method < 0) return; /* error message already emitted */
}
@@ -915,7 +915,7 @@ treat_file (char *iname)
part_nb = 0;
if (decompress) {
- method = get_method(ifd, 1); /* updates ofname if original given */
+ method = get_method (ifd, true); /* Update ofname if original given. */
if (method < 0) {
close(ifd);
return; /* error message already emitted */
@@ -955,7 +955,7 @@ treat_file (char *iname)
if (input_eof ())
break;
- method = get_method(ifd, 0);
+ method = get_method (ifd, false);
if (method < 0) break; /* error message already emitted */
}
@@ -1440,12 +1440,12 @@ discard_input_bytes (size_t nbytes, unsigned int flags)
* Updates time_stamp if there is one and neither -m nor -n is used.
* This function may be called repeatedly for an input file consisting
* of several contiguous gzip'ed members.
- * 'in' is the input file descriptor, 'first' is true for first call on in
+ * 'in' is the input file descriptor; 'first' is true for first call on 'in'.
* IN assertions: there is at least one remaining compressed member.
* If the member is a zip file, it must be the only one.
*/
static int
-get_method (int in, int first)
+get_method (int in, bool first)
{
uch flags; /* compression flags */
uch magic[10]; /* magic header */
@@ -1609,7 +1609,7 @@ get_method (int in, int first)
header_bytes = inptr + 2*4; /* include crc and size */
}
- } else if (first && memcmp(magic, PKZIP_MAGIC, 2) == 0) {
+ } else if (first && memcmp (magic, PKZIP_MAGIC, 2) == 0) {
/* To simplify the code, we support a zip file when alone only.
* We are thus guaranteed that the entire local header fits in inbuf.
*/
--
2.48.1
From 575250263c81801486ecf1c64dd0a6b239768d79 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Jun 2025 11:24:31 -0700
Subject: [PATCH 05/10] unzip: refactor loop that never loops
* unzip.c (bad_zipfile): New static function,
with most of the old contents of check_zipfile.
This is clearer than the trick of having a loop that never loops.
(check_zipfile): Use it.
---
unzip.c | 135 ++++++++++++++++++++++++++++----------------------------
1 file changed, 67 insertions(+), 68 deletions(-)
diff --git a/unzip.c b/unzip.c
index 8dfb454..4baa8ef 100644
--- a/unzip.c
+++ b/unzip.c
@@ -60,6 +60,68 @@ static int ext_header = 0; /* set if extended local header */
static ulg orig_crc; /* CRC from gzip trailer or zip local header */
static ulg orig_len; /* uncompressed length from trailer or header */
+/* Check zip file and advance inptr to the start of the compressed data.
+ Return a null pointer on success, a diagnostic string otherwise. */
+static char const *
+bad_zipfile (void)
+{
+ static char const bad[] = "not a valid zip file";
+ if (insize - inptr < 4)
+ return bad;
+ uch *h = inbuf + inptr;
+ ulg sig = LG (h);
+ if (sig == SPNSIG || sig == ONESIG)
+ {
+ /* skip spanning signature */
+ h += 4;
+ inptr += 4;
+ if (insize - inptr < 4)
+ return bad;
+ sig = LG (h);
+ }
+ if (sig == ENDSIG || sig == Z64SIG)
+ return "is an empty zip file";
+ if (sig != LOCSIG || insize - inptr < LOCHDR)
+ /* not a local header */
+ return bad;
+ inptr += LOCHDR;
+
+ /* Get compression method */
+ method = SH (h + LOCHOW);
+ if (method != STORED && method != DEFLATED)
+ return "first entry not deflated or stored -- use unzip";
+
+ /* Check for encryption */
+ if ((h[LOCFLG] & CRPFLG) != 0)
+ return "encrypted file -- use unzip";
+
+ /* Save header information for unzip() */
+ ext_header = (h[LOCFLG] & EXTFLG) != 0;
+ orig_crc = LG (h + LOCCRC);
+ orig_len = LG (h + LOCLEN);
+ if (method == STORED && (ext_header || orig_len != LG (h + LOCSIZ)))
+ return bad;
+ if (!ext_header && orig_len == 0xffffffff)
+ return "Zip64 entry -- not supported, use unzip";
+
+ /* Get ofname and timestamp from local header (to be done) */
+
+ /* Skip over the file name and extra field (need to loop for the
+ SMALL_MEM case) */
+ ulg skip = (ulg) SH (h + LOCFIL) + (ulg) SH (h + LOCEXT);
+ while (insize - inptr < skip)
+ {
+ skip -= insize - inptr;
+ fill_inbuf (0); /* Will error out on no more input. */
+ inptr = 0;
+ }
+ inptr += skip;
+
+ /* Good local header */
+ pkzip = 1;
+ return NULL;
+}
+
/* ===========================================================================
* Check zip file and advance inptr to the start of the compressed data.
* Get ofname from the local header if necessary.
@@ -68,78 +130,15 @@ static ulg orig_len; /* uncompressed length from trailer or header */
int
check_zipfile (int in)
{
- ulg sig, skip;
- uch *h = inbuf + inptr;
- const char *bad = "not a valid zip file";
ifd = in;
- do {
- if (insize - inptr < 4)
- break;
- sig = LG(h);
- if (sig == SPNSIG || sig == ONESIG) {
- /* skip spanning signature */
- h += 4;
- inptr += 4;
- if (insize - inptr < 4)
- break;
- sig = LG(h);
- }
- if (sig == ENDSIG || sig == Z64SIG) {
- /* empty zip file */
- bad = "is an empty zip file";
- break;
- }
- if (sig != LOCSIG || insize - inptr < LOCHDR)
- /* not a local header */
- break;
- inptr += LOCHDR;
-
- /* Get compression method */
- method = SH(h + LOCHOW);
- if (method != STORED && method != DEFLATED) {
- bad = "first entry not deflated or stored -- use unzip";
- break;
- }
+ char const *bad = bad_zipfile ();
- /* Check for encryption */
- if ((h[LOCFLG] & CRPFLG) != 0) {
- bad = "encrypted file -- use unzip";
- break;
- }
-
- /* Save header information for unzip() */
- ext_header = (h[LOCFLG] & EXTFLG) != 0;
- orig_crc = LG(h + LOCCRC);
- orig_len = LG(h + LOCLEN);
- if (method == STORED && (ext_header || orig_len != LG(h + LOCSIZ)))
- break;
- if (!ext_header && orig_len == 0xffffffff) {
- bad = "Zip64 entry -- not supported, use unzip";
- break;
- }
-
- /* Get ofname and timestamp from local header (to be done) */
-
- /* Skip over the file name and extra field (need to loop for the
- SMALL_MEM case) */
- skip = (ulg)SH(h + LOCFIL) + (ulg)SH(h + LOCEXT);
- while (skip > insize - inptr) {
- skip -= insize - inptr;
- fill_inbuf(0); /* will error out on no more input */
- inptr = 0;
- }
- inptr += skip;
-
- /* Good local header */
- pkzip = 1;
- bad = NULL;
- } while (0);
-
- if (bad != NULL) {
- fprintf(stderr, "\n%s: %s: %s\n", program_name, ifname, bad);
+ if (bad)
+ {
+ fprintf (stderr, "\n%s: %s: %s\n", program_name, ifname, bad);
exit_code = ERROR;
return ERROR;
- }
+ }
return OK;
}
--
2.48.1
From a1e8e0b981ec48c2a2ac2756810c487a0fa31e1b Mon Sep 17 00:00:00 2001
From: Mark Adler <g...@madler.net>
Date: Mon, 16 Jun 2025 00:56:08 -0700
Subject: [PATCH 06/10] zip: correctly handle PKZIP data descriptors
* unzip.c (DATSIG, L8): New macros.
(unzip): Previously only one of the four possible data descriptors was
handled. Check for all four when validating the uncompressed
data with the CRC and length in the data descriptor. This also now
checks the full eight-byte uncompressed length for zip files.
---
unzip.c | 138 +++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 97 insertions(+), 41 deletions(-)
diff --git a/unzip.c b/unzip.c
index 4baa8ef..47083f9 100644
--- a/unzip.c
+++ b/unzip.c
@@ -36,6 +36,7 @@
#define ONESIG 0x30304b50L /* was spanned but re-marked as one segment */
#define Z64SIG 0x06064b50L /* zip64 end record */
#define ENDSIG 0x06054b50L /* end record */
+#define DATSIG 0x08074b50L /* data descriptor (yes, same as span sig) */
#define LOCSIG 0x04034b50L /* local header */
#define LOCFLG 6 /* offset of bit flag */
#define CRPFLG 1 /* bit for encrypted entry */
@@ -48,7 +49,10 @@
#define LOCFIL 26 /* offset of file name field length */
#define LOCEXT 28 /* offset of extra field length */
#define LOCHDR 30 /* size of local header, including sig */
-#define EXTHDR 16 /* size of extended local header, inc sig */
+#define EXTHDR 24 /* maximum size of data descriptor, inc sig */
+
+/* Macro for getting an eight-byte header value */
+#define L8(p) ((off_t)LG(p) | ((off_t)LG((p)+4) << 32))
/* Globals */
@@ -155,8 +159,11 @@ check_zipfile (int in)
int
unzip (int in, int out)
{
+ ulg crc; /* data crc */
+ off_t len; /* data uncompressed length */
off_t orig_bytes_out = bytes_out;
- int n;
+ int n; /* number of bytes in buf */
+ int d, i;
uch buf[EXTHDR]; /* extended local header */
int err = OK;
@@ -192,56 +199,105 @@ unzip (int in, int out)
gzip_error ("internal error, invalid method");
}
- /* Get the crc and original length */
- if (!pkzip) {
- /* crc32 (see algorithm.doc)
- * uncompressed input size modulo 2^32
- */
- for (n = 0; n < 8; n++) {
- buf[n] = (uch)get_byte(); /* may cause an error if EOF */
- }
- orig_crc = LG(buf);
- orig_len = LG(buf+4);
-
- } else if (ext_header) { /* If extended header, check it */
- /* signature - 4bytes: 0x50 0x4b 0x07 0x08
- * CRC-32 value
- * compressed size 4-bytes
- * uncompressed size 4-bytes
+ /* Uncompressed data CRC and length */
+ crc = getcrc();
+ len = bytes_out - orig_bytes_out;
+
+ if (ext_header) {
+ /* Validate the zip data descriptor */
+
+ /* optional signature - 0 bytes or 4 bytes: 0x50 0x4b 0x07 0x08
+ * CRC-32 value - 4 bytes
+ * compressed size - 4 bytes or 8 bytes
+ * uncompressed size - length same as compressed size (4 or 8)
*/
for (n = 0; n < EXTHDR; n++) {
- buf[n] = (uch)get_byte(); /* may cause an error if EOF */
+ int i = try_byte();
+ if (i == EOF)
+ break;
+ buf[n] = (uch)i;
}
- orig_crc = LG(buf+4);
- orig_len = LG(buf+12);
- }
- /* Validate decompression */
- if (orig_crc != updcrc(outbuf, 0)) {
- fprintf(stderr, "\n%s: %s: invalid compressed data--crc error\n",
- program_name, ifname);
- err = ERROR;
- }
- if (orig_len != (ulg)((bytes_out - orig_bytes_out) & 0xffffffff)) {
- fprintf(stderr, "\n%s: %s: invalid compressed data--length error\n",
- program_name, ifname);
- err = ERROR;
+ /* Try all four possibilities, from longest to shortest -- set d to
+ the length of the valid data descriptor, or 0 if invalid */
+ d = n >= 24 && LG(buf) == DATSIG && LG(buf + 4) == crc &&
+ L8(buf + 16) == len ? 24 :
+ n >= 20 && LG(buf) == crc && L8(buf + 12) == len ? 20 :
+ n >= 16 && LG(buf) == DATSIG && LG(buf + 4) == crc &&
+ (off_t)LG(buf + 12) == len ? 16 :
+ n >= 12 && LG(buf) == crc && (off_t)LG(buf + 8) == len ? 12 :
+ /* else */ 0;
+
+ if (d) {
+ /* Move the data after the descriptor to the start of buf[] */
+ i = 0;
+ while (d < n)
+ buf[i++] = buf[d++];
+ n = i;
+ }
+ else {
+ /* Invalid CRC or length */
+ fprintf(stderr,
+ "\n%s: %s: invalid compressed data--crc/length error\n",
+ program_name, ifname);
+ err = ERROR;
+ }
}
- /* Check if there are more entries in a pkzip file */
- if (pkzip && inptr + 4 < insize && LG(inbuf+inptr) == LOCSIG) {
- if (to_stdout) {
- WARN((stderr,
- "%s: %s has more than one entry--rest ignored\n",
- program_name, ifname));
- } else {
- /* Don't destroy the input zip file */
+ else {
+ /* Check the crc and length in the gzip trailer or zip local header */
+ if (!pkzip) {
+ /* crc32 (see algorithm.doc)
+ * uncompressed input size modulo 2^32
+ */
+ for (n = 0; n < 8; n++) {
+ buf[n] = (uch)get_byte(); /* may cause an error if EOF */
+ }
+ orig_crc = LG(buf);
+ orig_len = LG(buf+4);
+ len &= 0xffffffff; /* compare modulo 2^32 */
+ }
+ n = 0;
+
+ /* Validate decompression */
+ if (orig_crc != crc) {
+ fprintf(stderr,
+ "\n%s: %s: invalid compressed data--crc error\n",
+ program_name, ifname);
+ err = ERROR;
+ }
+ if ((off_t)orig_len != len) {
fprintf(stderr,
- "%s: %s has more than one entry -- unchanged\n",
+ "\n%s: %s: invalid compressed data--length error\n",
program_name, ifname);
err = ERROR;
}
}
+
+ /* Check if there are more entries in a pkzip file */
+ if (pkzip) {
+ while (n < 4) {
+ i = try_byte();
+ if (i == EOF)
+ break;
+ buf[n++] = (uch)i;
+ }
+ if (n >= 4 && LG(buf) == LOCSIG) {
+ /* There are more entries */
+ if (to_stdout) {
+ WARN((stderr,
+ "%s: %s has more than one entry--rest ignored\n",
+ program_name, ifname));
+ } else {
+ /* Don't destroy the input zip file */
+ fprintf(stderr,
+ "%s: %s has more than one entry -- unchanged\n",
+ program_name, ifname);
+ err = ERROR;
+ }
+ }
+ }
+
ext_header = pkzip = 0; /* for next file */
unzip_crc = orig_crc;
if (err == OK) return OK;
--
2.48.1
From fd5b3c6b20e935688010e5ec96e60d55e8e92e2a Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Jun 2025 12:14:21 -0700
Subject: [PATCH 07/10] unzip: use GNU style in newly-added code
* unzip.c (unzip): Use GNU style.
---
unzip.c | 174 +++++++++++++++++++++++++++++---------------------------
1 file changed, 90 insertions(+), 84 deletions(-)
diff --git a/unzip.c b/unzip.c
index 47083f9..b45110c 100644
--- a/unzip.c
+++ b/unzip.c
@@ -49,10 +49,10 @@
#define LOCFIL 26 /* offset of file name field length */
#define LOCEXT 28 /* offset of extra field length */
#define LOCHDR 30 /* size of local header, including sig */
-#define EXTHDR 24 /* maximum size of data descriptor, inc sig */
+#define EXTHDR 24 /* Max size of data descriptor, incl. sig. */
/* Macro for getting an eight-byte header value */
-#define L8(p) ((off_t)LG(p) | ((off_t)LG((p)+4) << 32))
+#define L8(p) ((off_t) LG (p) | ((off_t) LG ((p) + 4) << 32))
/* Globals */
@@ -159,11 +159,8 @@ check_zipfile (int in)
int
unzip (int in, int out)
{
- ulg crc; /* data crc */
- off_t len; /* data uncompressed length */
off_t orig_bytes_out = bytes_out;
- int n; /* number of bytes in buf */
- int d, i;
+ int n; /* Number of bytes in buf. */
uch buf[EXTHDR]; /* extended local header */
int err = OK;
@@ -199,104 +196,113 @@ unzip (int in, int out)
gzip_error ("internal error, invalid method");
}
- /* Uncompressed data CRC and length */
- crc = getcrc();
- len = bytes_out - orig_bytes_out;
+ /* Uncompressed data CRC and length. */
+ ulg crc = getcrc ();
+ off_t len = bytes_out - orig_bytes_out;
- if (ext_header) {
- /* Validate the zip data descriptor */
-
- /* optional signature - 0 bytes or 4 bytes: 0x50 0x4b 0x07 0x08
- * CRC-32 value - 4 bytes
- * compressed size - 4 bytes or 8 bytes
- * uncompressed size - length same as compressed size (4 or 8)
- */
+ if (ext_header)
+ {
+ /* Validate the zip data descriptor, as follows:
+ - optional signature - 0 bytes or 4 bytes: 0x50 0x4b 0x07 0x08
+ - CRC-32 value - 4 bytes
+ - compressed size - 4 bytes or 8 bytes
+ - uncompressed size - length same as compressed size (4 or 8)
+ */
for (n = 0; n < EXTHDR; n++) {
- int i = try_byte();
+ int i = try_byte ();
if (i == EOF)
- break;
- buf[n] = (uch)i;
+ break;
+ buf[n] = (uch) i;
}
- /* Try all four possibilities, from longest to shortest -- set d to
- the length of the valid data descriptor, or 0 if invalid */
- d = n >= 24 && LG(buf) == DATSIG && LG(buf + 4) == crc &&
- L8(buf + 16) == len ? 24 :
- n >= 20 && LG(buf) == crc && L8(buf + 12) == len ? 20 :
- n >= 16 && LG(buf) == DATSIG && LG(buf + 4) == crc &&
- (off_t)LG(buf + 12) == len ? 16 :
- n >= 12 && LG(buf) == crc && (off_t)LG(buf + 8) == len ? 12 :
- /* else */ 0;
-
- if (d) {
- /* Move the data after the descriptor to the start of buf[] */
- i = 0;
+ /* Try all four possibilities, from longest to shortest.
+ D is the length of the valid data descriptor, or 0 if invalid. */
+ int d = ((24 <= n && LG (buf) == DATSIG && LG (buf + 4) == crc
+ && L8 (buf + 16) == len)
+ ? 24
+ : 20 <= n && LG (buf) == crc && L8 (buf + 12) == len
+ ? 20
+ : (16 <= n && LG (buf) == DATSIG && LG (buf + 4) == crc
+ && (off_t) LG (buf + 12) == len)
+ ? 16
+ : 12 <= n && LG (buf) == crc && (off_t) LG (buf + 8) == len
+ ? 12
+ : 0);
+ if (d)
+ {
+ /* Move the data after the descriptor to the start of buf. */
+ int i = 0;
while (d < n)
buf[i++] = buf[d++];
n = i;
- }
- else {
- /* Invalid CRC or length */
- fprintf(stderr,
- "\n%s: %s: invalid compressed data--crc/length error\n",
- program_name, ifname);
+ }
+ else
+ {
+ fprintf (stderr,
+ "\n%s: %s: invalid compressed data--crc/length error\n",
+ program_name, ifname);
err = ERROR;
- }
- }
-
- else {
- /* Check the crc and length in the gzip trailer or zip local header */
- if (!pkzip) {
- /* crc32 (see algorithm.doc)
- * uncompressed input size modulo 2^32
- */
- for (n = 0; n < 8; n++) {
- buf[n] = (uch)get_byte(); /* may cause an error if EOF */
- }
- orig_crc = LG(buf);
- orig_len = LG(buf+4);
+ }
+ }
+ else
+ {
+ /* Check crc and length in the gzip trailer or zip local header. */
+ if (!pkzip)
+ {
+ /* crc32 (see algorithm.doc)
+ Uncompressed input size modulo 2^32. */
+ for (n = 0; n < 8; n++)
+ buf[n] = (uch) get_byte (); /* May cause an error if EOF. */
+ orig_crc = LG (buf);
+ orig_len = LG (buf + 4);
len &= 0xffffffff; /* compare modulo 2^32 */
- }
+ }
n = 0;
/* Validate decompression */
- if (orig_crc != crc) {
- fprintf(stderr,
- "\n%s: %s: invalid compressed data--crc error\n",
- program_name, ifname);
+ if (orig_crc != crc)
+ {
+ fprintf (stderr,
+ "\n%s: %s: invalid compressed data--crc error\n",
+ program_name, ifname);
err = ERROR;
- }
- if ((off_t)orig_len != len) {
- fprintf(stderr,
- "\n%s: %s: invalid compressed data--length error\n",
- program_name, ifname);
+ }
+ if ((off_t) orig_len != len)
+ {
+ fprintf (stderr,
+ "\n%s: %s: invalid compressed data--length error\n",
+ program_name, ifname);
err = ERROR;
- }
+ }
}
/* Check if there are more entries in a pkzip file */
- if (pkzip) {
- while (n < 4) {
- i = try_byte();
+ if (pkzip)
+ {
+ while (n < 4)
+ {
+ int i = try_byte ();
if (i == EOF)
- break;
- buf[n++] = (uch)i;
- }
- if (n >= 4 && LG(buf) == LOCSIG) {
- /* There are more entries */
- if (to_stdout) {
- WARN((stderr,
- "%s: %s has more than one entry--rest ignored\n",
- program_name, ifname));
- } else {
- /* Don't destroy the input zip file */
- fprintf(stderr,
- "%s: %s has more than one entry -- unchanged\n",
- program_name, ifname);
+ break;
+ buf[n++] = (uch) i;
+ }
+ if (n >= 4 && LG (buf) == LOCSIG)
+ {
+ /* There are more entries. */
+ if (to_stdout)
+ WARN ((stderr,
+ "%s: %s has more than one entry--rest ignored\n",
+ program_name, ifname));
+ else
+ {
+ /* Don't destroy the input zip file. */
+ fprintf (stderr,
+ "%s: %s has more than one entry -- unchanged\n",
+ program_name, ifname);
err = ERROR;
- }
- }
- }
+ }
+ }
+ }
ext_header = pkzip = 0; /* for next file */
unzip_crc = orig_crc;
--
2.48.1
From 9b6ce179facd570b1204568d6d5921e1ba805883 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Jun 2025 12:33:39 -0700
Subject: [PATCH 08/10] unzip: add regression test for recent fixes
* tests/unzip-valid: New test, taken from Bug#78799.
* tests/Makefile.am (TESTS): Add it.
---
tests/Makefile.am | 1 +
tests/unzip-valid | 61 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 62 insertions(+)
create mode 100755 tests/unzip-valid
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 878e132..bbe55d9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -36,6 +36,7 @@ TESTS = \
unpack-invalid \
unpack-valid \
upper-suffix \
+ unzip-valid \
write-error \
z-suffix \
zdiff \
diff --git a/tests/unzip-valid b/tests/unzip-valid
new file mode 100755
index 0000000..1f00ea8
--- /dev/null
+++ b/tests/unzip-valid
@@ -0,0 +1,61 @@
+#!/bin/sh
+# Test reading of PKZIP data descriptors.
+
+# Copyright 2025 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# limit so don't run it by default.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ..
+
+printf '%s\n' \
+'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'\
+' Quisque tempor suscipit quam.'\
+' Aenean est lorem, luctus commodo massa quis, egestas molestie nisl.'\
+' Maecenas ac erat tempor, malesuada dolor ut, ultricies diam.'\
+' Suspendisse eget augue eget neque consectetur pellentesque sed sed massa.'\
+' In hac habitasse platea dictumst.' >exp || framework_failure_
+hex_printf_ \
+'\x50\x4b\x03\x04\x2d\x00\x08\x00\x08\x00\x6e\x63\xcf\x5a\x00\x00'\
+'\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x05\x00\x1d\x00\x6c\x6f'\
+'\x72\x65\x6d\x01\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'\
+'\x00\x00\x00\x00\x00\x00\x00\x55\x54\x05\x00\x01\xa0\x1e\x4f\x68'\
+'\x4d\x8f\x4d\x6a\xc5\x40\x0c\x83\xf7\x3d\x85\x0e\x10\x72\x87\x2e'\
+'\x0b\xed\xa2\xf4\x04\xee\x8c\xc8\x33\xcc\xdf\x8b\xed\xfb\xd7\x09'\
+'\x6f\xd1\xc5\x80\x19\x4b\xd6\xa7\xcf\x79\xb2\x43\x97\x45\x47\x9d'\
+'\x6d\x9e\x30\x75\x48\xa7\x6f\x28\x73\x18\x8b\xd3\xe3\x84\x54\x5d'\
+'\x6a\x45\xc7\x01\x36\xf5\x1d\xdf\xa1\xf6\x0c\xc2\xd9\xd7\xe5\x8a'\
+'\x5c\xae\xb4\x3e\x43\xfa\x8e\x77\x0e\xca\x00\xcd\xd1\xae\x84\x0d'\
+'\x2d\x8a\x87\xe5\xcd\xde\x67\x9d\xe8\x62\x26\x29\x56\xdb\xc0\x23'\
+'\x75\x62\xe8\xb3\xe5\xa0\xc4\x50\x6b\x3b\xbe\x84\x85\x23\xff\xa5'\
+'\x80\xa7\xf8\x2b\x6a\x4b\x6f\xea\x42\xaa\xbc\x88\x23\x59\xa3\xf9'\
+'\xa9\x45\x69\xa8\x7a\x01\xfc\x84\x2d\x8e\xaa\x66\xbc\xee\x67\xa5'\
+'\x38\xe2\x35\x0e\x5e\xe0\xff\xdb\x2d\xb6\xc6\xe1\xbc\x1b\x19\xeb'\
+'\xfd\x6e\xc4\x1d\x1f\x03\x8f\x24\x78\xc8\xaf\x26\x64\x9e\x5b\x4d'\
+'\x9c\x99\xad\x59\xa8\x9b\xef\x6f\x7f\x50\x4b\x07\x08\x1e\xf0\xbc'\
+'\x58\xc9\x00\x00\x00\x00\x00\x00\x00\x44\x01\x00\x00\x00\x00\x00'\
+'\x00\x50\x4b\x01\x02\x2d\xff\x2d\x00\x08\x00\x08\x00\x6e\x63\xcf'\
+'\x5a\x1e\xf0\xbc\x58\xc9\x00\x00\x00\x44\x01\x00\x00\x05\x00\x09'\
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x6c'\
+'\x6f\x72\x65\x6d\x55\x54\x05\x00\x01\xa0\x1e\x4f\x68\x50\x4b\x05'\
+'\x06\x00\x00\x00\x00\x01\x00\x01\x00\x3c\x00\x00\x00\x21\x01\x00'\
+'\x00\x00\x00' > test.gz || framework_failure_
+
+fail=0
+gzip -dc test.gz > out 2> err || fail=1
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
--
2.48.1
From a5f00e1bd550a195df6a8de7f97b2ab0f9bc7aeb Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 30 May 2025 12:01:56 -0700
Subject: [PATCH 09/10] =?UTF-8?q?gzip:=20don=E2=80=99t=20assume=20EOF=20?=
=?UTF-8?q?=3D=3D=20-1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Although EOF == -1 on all known platforms, POSIX and C don’t
guarantee it. Fix code that silently assumes this.
* deflate.c (lm_init, fill_window):
* gzip.c (get_method):
* util.c (fill_inbuf):
Don’t assume EOF == -1, or that converting EOF to unsigned
and back to int yields -1. Instead, statically check
EOF-related assumptions.
* deflate.c (fill_window): Check for more < EOF, not more != EOF.
---
deflate.c | 10 ++++++----
gzip.c | 6 +++++-
util.c | 5 +++++
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/deflate.c b/deflate.c
index aad8723..f3ed982 100644
--- a/deflate.c
+++ b/deflate.c
@@ -336,8 +336,9 @@ lm_init (int pack_level)
match_init(); /* initialize the asm code */
#endif
- lookahead = read_buf((char*)window,
- sizeof(int) <= 2 ? (unsigned)WSIZE : 2*WSIZE);
+#define READ_BUF_SIZE (sizeof (int) <= 2 ? (unsigned) WSIZE : 2 * WSIZE)
+ static_assert (READ_BUF_SIZE < (unsigned) EOF);
+ lookahead = read_buf ((char *) window, READ_BUF_SIZE);
if (lookahead == 0 || lookahead == (unsigned)EOF) {
eofile = 1, lookahead = 0;
@@ -537,11 +538,12 @@ fill_window ()
/* If the window is almost full and there is insufficient lookahead,
* move the upper half to the lower one to make room in the upper half.
*/
- if (more == (unsigned)EOF) {
+ if ((unsigned) EOF <= more) {
/* Very unlikely, but possible on 16 bit machine if strstart == 0
* and lookahead == 1 (input done one byte at time)
*/
- more--;
+ static_assert (2 < (unsigned) EOF);
+ more = (unsigned) EOF - 1;
} else if (strstart >= WSIZE+MAX_DIST) {
/* By the IN assertion, the window is not empty so we can't confuse
* more == 0 with more == 64K on a 16 bit machine.
diff --git a/gzip.c b/gzip.c
index 43146c2..0bfd197 100644
--- a/gzip.c
+++ b/gzip.c
@@ -1461,7 +1461,11 @@ get_method (int in, bool first)
magic[0] = imagic0;
imagic1 = try_byte ();
magic[1] = imagic1;
- /* If try_byte returned EOF, magic[1] == (char) EOF. */
+ /* If try_byte returned EOF, magic[1] == (char) EOF.
+ Although POSIX says this could cause gzip to trap
+ if EOF < CHAR_MIN < 0, no known platform is like that;
+ check to be safe. */
+ static_assert (! (EOF < CHAR_MIN && CHAR_MIN < 0));
} else {
magic[0] = get_byte ();
imagic0 = 0;
diff --git a/util.c b/util.c
index 1496e43..bbec0f4 100644
--- a/util.c
+++ b/util.c
@@ -103,6 +103,11 @@ void clear_bufs()
bytes_in = bytes_out = 0L;
}
+/* fill_inbuf callers convert EOF to unsigned and back.
+ Although this is a no-op on all known platforms, C does not guarantee it.
+ Add a static check to be safer. */
+static_assert ((int) (unsigned) EOF == EOF);
+
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty.
* EOF_OK is set if EOF acceptable as a result.
--
2.48.1
From 7bd69d3e7ab8323d05f480ef2b459a04421eb5ab Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 16 Jun 2025 12:54:51 -0700
Subject: [PATCH 10/10] maint: add NEWS entry re PKZIP fixes
---
NEWS | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/NEWS b/NEWS
index 9be1a87..6cca400 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@ GNU gzip NEWS -*- outline -*-
** Bug fixes
+ gzip -d no longer rejects PKZIP signatures, local header, and data
+ descriptors. These can appear in well-formed albeit unusual pigz output.
+ [bug present since the beginning]
+
A use of uninitialized memory on some malformed inputs has been fixed.
[bug present since the beginning]
--
2.48.1