Subject: gzip: Decompression performance improvement Package: gzip Version: 1.3.12-2ubuntu1 Severity: wishlist
*** Please type your report below this line *** I think Jean-loup Gailly's emphasis with gzip is for portability. As such, Mark Adler's performance changes in zlib-1.2.0 haven't been ported across. >From the zlib-1.2.3 ChangeLog Changes in 1.2.0 (9 March 2003) - New and improved inflate code - About 20% faster - Does not allocate 32K window unless and until needed ... - New and improved crc32() - About 50% faster OpenBSD 4.2 compress uses zlib, testing with the 120Mb openbsd source archive a quick & dirty compress port on Ubuntu 7.10 x86_64 gave ~25% decompression speed improvement. Assuming that the compress tests as compatible with debian ncompress, please consider using this as the gzip decompressor for gunzip zcat et al, which may require patching other utilities to use that rather than gzip -d. See patches at end. $ uname -a Linux kefk 2.6.22-14-generic #1 SMP Sun Oct 14 21:45.. 2007 x86_64 GNU/Linux model name : AMD Athlon(tm) 64 X2 Dual Core Processor 3800+ $ bc scale=1 (5.267-3.714)*100/5.267 29.4 # => 29.4% reduction in Elapsed decompression time (5.176-3.716)*100/5.176 28.2 # => 28.2% reduction in User decompression time Script started on Thu 01 Nov 2007 22:12:53 EST $ ls -l -color=never file-contents.db /media/cdrom/src.tar.gz -rw------- 1 rdb rdb 543117312 2007-10-28 22:20 file-contents.db -rw-r--r-- 1 root root 125071304 2007-08-30 14:25 /media/cdrom/src.tar.gz $ md5sum file-contents.db !ls:$ md5sum file-contents.db /media/cdrom/src.tar.gz b18c606aed1144233647c63578145182 file-contents.db b2ae78abe02001311e4ed2bf5a37dd1e /media/cdrom/src.tar.gz $ gzip -dc !$ | md5sum gzip -dc /media/cdrom/src.tar.gz | md5sum 0fa5d307fb57eb59e992a266a86175bb - $ ./compress -dc !:2-$ ./compress -dc /media/cdrom/src.tar.gz | md5sum 0fa5d307fb57eb59e992a266a86175bb - $ for i in 1 2 3 4 > do > time gzip -dc /media/cdrom/src.tar.gz >> /dev/null > time ./compress -dc /media/cdrom/src.tar.gz >> /dev/null > done real 0m5.267s user 0m5.184s sys 0m0.080s real 0m3.714s user 0m3.628s sys 0m0.080s real 0m5.280s user 0m5.204s sys 0m0.068s real 0m3.761s user 0m3.716s sys 0m0.036s real 0m5.289s user 0m5.216s sys 0m0.064s real 0m3.755s user 0m3.676s sys 0m0.072s real 0m5.283s user 0m5.176s sys 0m0.096s real 0m3.733s user 0m3.628s sys 0m0.100s $ time gzip -9 file-contents.db real 0m51.545s user 0m38.362s sys 0m3.016s $ ls -l file-contents.db.gz -rw------- 1 rdb rdb 478499480 2007-10-28 22:20 file-contents.db.gz $ time ./compress -d !$ time ./compress -d file-contents.db.gz real 0m17.828s user 0m4.224s sys 0m2.836s $ md5sum file-contents.db b18c606aed1144233647c63578145182 file-contents.db $ time ./compress -9 !$ time ./compress -9 file-contents.db real 0m51.604s user 0m37.830s sys 0m3.180s $ ls -l !$ ls -l file-contents.db.gz -rw------- 1 rdb rdb 478854228 2007-10-28 22:20 file-contents.db.gz $ !tim time gzip -d file-contents.db.gz real 0m19.535s user 0m6.620s sys 0m2.924s $ md5sum file-contents.db b18c606aed1144233647c63578145182 file-contents.db $ gzip --version gzip 1.3.12 Copyright (C) 2007 Free Software Foundation, Inc. Copyright (C) 1993 Jean-loup Gailly. This is free software. You may redistribute copies of it under the terms of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>. There is NO WARRANTY, to the extent permitted by law. Written by Jean-loup Gailly. $ ./compress -h usage: compress [-123456789cdfghLlNnOqrtVv] [-b bits] [-o filename] [-S suffix] [file ...] $ ./compress -V $OpenBSD: main.c,v 1.70 2007/04/04 13:29:45 millert Exp $ $OpenBSD: gzopen.c,v 1.24 2007/03/19 13:02:18 pedro Exp $ $OpenBSD: zopen.c,v 1.17 2005/08/25 17:07:56 millert Exp $ $OpenBSD: nullopen.c,v 1.3 2005/06/26 18:20:26 otto Exp $ Patches against current OpenBSD CVS compress sources [EMAIL PROTECTED]:/cvs cvs co src/usr.bin/compress The BSD makefile, builds with pmake, but -Werror breaks the build and main.c needs -D_GNU_SOURCE on linux, so I lazily compiled it and linked by hand. gcc -D_GNU_SOURCE -g -c main.c gcc -D_GNU_SOURCE -g -o compress main.o zopen.o gzopen.o nullopen.o -lz --- tmp/compress/compress.h 2005-06-27 04:20:26.000000000 +1000 +++ compress/compress.h 2007-10-28 12:59:46.000000000 +1100 @@ -80,3 +80,7 @@ extern int null_close(void *, struct z_i extern int null_flush(void *, int); extern void setfile(const char *, int, struct stat *); + +#ifndef EFTYPE /* Inappropriate file type or format */ +#define EFTYPE ENOTEMPTY +#endif --- tmp/compress/gzopen.c 2007-03-20 00:02:18.000000000 +1100 +++ compress/gzopen.c 2007-10-28 15:54:46.000000000 +1100 @@ -74,6 +74,16 @@ const char gz_rcsid[] = #include <zlib.h> #include "compress.h" +#ifdef linux +#include <endian.h> +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define htole32(x) (x) +#endif +#if __BYTE_ORDER == __BIG_ENDIAN +#define htole32 __swap32 +#endif +#endif + /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ --- tmp/compress/main.c 2007-04-04 23:29:45.000000000 +1000 +++ compress/main.c 2007-11-01 22:05:01.000000000 +1100 @@ -54,6 +54,9 @@ static const char main_rcsid[] = "$OpenB #include <unistd.h> #include <fcntl.h> #include <paths.h> +#if defined(linux) +#include <time.h> +#endif #include "compress.h" #define min(a,b) ((a) < (b)? (a) : (b)) @@ -92,6 +95,65 @@ const struct compressor null_method = { "null", ".nul", "XX", null_open, null_read, null_write, null_close }; #endif /* SMALL */ +#define HAVE_FCHFLAGS +#if defined(linux) +#undef HAVE_FCHFLAGS +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +#define __dead __attribute__((__noreturn__)) +#endif +/* $OpenBSD: strlcpy.c,v 1.11 2006/05/05 15:27:38 millert Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller <[EMAIL PROTECTED]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} +#endif + int permission(const char *); __dead void usage(int); int docompress(const char *, char *, const struct compressor *, @@ -676,10 +738,15 @@ dodecompress(const char *in, char *out, } if (storename && !cat) { if (info.mtime != 0) { +#if !defined(linux) sb->st_mtimespec.tv_sec = sb->st_atimespec.tv_sec = info.mtime; sb->st_mtimespec.tv_nsec = sb->st_atimespec.tv_nsec = 0; +#else + sb->st_mtim.tv_sec = sb->st_atim.tv_sec = info.mtime; + sb->st_mtim.tv_nsec = sb->st_atim.tv_nsec = 0; +#endif } else storename = 0; /* no timestamp to restore */ } @@ -744,11 +811,18 @@ setfile(const char *name, int fd, struct if (fchmod(fd, fs->st_mode)) warn("fchmod: %s", name); +#if defined(HAVE_FCHFLAGS) if (fs->st_flags && fchflags(fd, fs->st_flags)) warn("fchflags: %s", name); +#endif +#if !defined(linux) TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atimespec); TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtimespec); +#else + TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atim); + TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtim); +#endif if (futimes(fd, tv)) warn("futimes: %s", name); } --- tmp/compress/zopen.c 2005-08-26 03:07:56.000000000 +1000 +++ compress/zopen.c 2007-10-28 12:58:44.000000000 +1100 @@ -753,8 +753,28 @@ zopen(const char *name, const char *mode close(fd); return NULL; } +#if !defined(linux) return funopen(cookie, (*mode == 'r'?zread:NULL), (*mode == 'w'?zwrite:NULL), NULL, zclose); +#else + { +#include <libio.h> + cookie_io_functions_t io_funct; + const char *open_mode; + + io_funct.close = zclose; + io_funct.seek = io_funct.read = io_funct.write = NULL; + if ('w' == *mode) { + io_funct.write = zwrite; + open_mode = "w"; + } + else { + io_funct.read = zread; + open_mode = "r"; + } + return fopencookie(cookie, open_mode, io_funct); + } +#endif } void * -- System Information: Debian Release: lenny/sid APT prefers gutsy-updates APT policy: (500, 'gutsy-updates'), (500, 'gutsy-security'), (500, 'gutsy') Architecture: amd64 (x86_64) Kernel: Linux 2.6.22-14-generic (SMP w/2 CPU cores) Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Versions of packages gzip depends on: ii debianutils 2.22.1 Miscellaneous utilities specific t ii libc6 2.6.1-1ubuntu9 GNU C Library: Shared libraries gzip recommends no packages. -- no debconf information
Subject: gzip: Decompression performance improvement Package: gzip Version: 1.3.12-2ubuntu1 Severity: wishlist *** Please type your report below this line *** I think Jean-loup Gailly's emphasis with gzip is for portability. As such, Mark Adler's performance changes in zlib-1.2.0 haven't been ported across. >From the zlib-1.2.3 ChangeLog Changes in 1.2.0 (9 March 2003) - New and improved inflate code - About 20% faster - Does not allocate 32K window unless and until needed ... - New and improved crc32() - About 50% faster OpenBSD 4.2 compress uses zlib, testing with the 120Mb openbsd source archive a quick & dirty compress port on Ubuntu 7.10 x86_64 gave ~25% decompression speed improvement. Assuming that the compress tests as compatible with debian ncompress, please consider using this as the gzip decompressor for gunzip zcat et al, which may require patching other utilities to use that rather than gzip -d. See patches at end. $ uname -a Linux kefk 2.6.22-14-generic #1 SMP Sun Oct 14 21:45.. 2007 x86_64 GNU/Linux model name : AMD Athlon(tm) 64 X2 Dual Core Processor 3800+ $ bc scale=1 (5.267-3.714)*100/5.267 29.4 # => 29.4% reduction in Elapsed decompression time (5.176-3.716)*100/5.176 28.2 # => 28.2% reduction in User decompression time Script started on Thu 01 Nov 2007 22:12:53 EST $ ls -l -color=never file-contents.db /media/cdrom/src.tar.gz -rw------- 1 rdb rdb 543117312 2007-10-28 22:20 file-contents.db -rw-r--r-- 1 root root 125071304 2007-08-30 14:25 /media/cdrom/src.tar.gz $ md5sum file-contents.db !ls:$ md5sum file-contents.db /media/cdrom/src.tar.gz b18c606aed1144233647c63578145182 file-contents.db b2ae78abe02001311e4ed2bf5a37dd1e /media/cdrom/src.tar.gz $ gzip -dc !$ | md5sum gzip -dc /media/cdrom/src.tar.gz | md5sum 0fa5d307fb57eb59e992a266a86175bb - $ ./compress -dc !:2-$ ./compress -dc /media/cdrom/src.tar.gz | md5sum 0fa5d307fb57eb59e992a266a86175bb - $ for i in 1 2 3 4 > do > time gzip -dc /media/cdrom/src.tar.gz >> /dev/null > time ./compress -dc /media/cdrom/src.tar.gz >> /dev/null > done real 0m5.267s user 0m5.184s sys 0m0.080s real 0m3.714s user 0m3.628s sys 0m0.080s real 0m5.280s user 0m5.204s sys 0m0.068s real 0m3.761s user 0m3.716s sys 0m0.036s real 0m5.289s user 0m5.216s sys 0m0.064s real 0m3.755s user 0m3.676s sys 0m0.072s real 0m5.283s user 0m5.176s sys 0m0.096s real 0m3.733s user 0m3.628s sys 0m0.100s $ time gzip -9 file-contents.db real 0m51.545s user 0m38.362s sys 0m3.016s $ ls -l file-contents.db.gz -rw------- 1 rdb rdb 478499480 2007-10-28 22:20 file-contents.db.gz $ time ./compress -d !$ time ./compress -d file-contents.db.gz real 0m17.828s user 0m4.224s sys 0m2.836s $ md5sum file-contents.db b18c606aed1144233647c63578145182 file-contents.db $ time ./compress -9 !$ time ./compress -9 file-contents.db real 0m51.604s user 0m37.830s sys 0m3.180s $ ls -l !$ ls -l file-contents.db.gz -rw------- 1 rdb rdb 478854228 2007-10-28 22:20 file-contents.db.gz $ !tim time gzip -d file-contents.db.gz real 0m19.535s user 0m6.620s sys 0m2.924s $ md5sum file-contents.db b18c606aed1144233647c63578145182 file-contents.db $ gzip --version gzip 1.3.12 Copyright (C) 2007 Free Software Foundation, Inc. Copyright (C) 1993 Jean-loup Gailly. This is free software. You may redistribute copies of it under the terms of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>. There is NO WARRANTY, to the extent permitted by law. Written by Jean-loup Gailly. $ ./compress -h usage: compress [-123456789cdfghLlNnOqrtVv] [-b bits] [-o filename] [-S suffix] [file ...] $ ./compress -V $OpenBSD: main.c,v 1.70 2007/04/04 13:29:45 millert Exp $ $OpenBSD: gzopen.c,v 1.24 2007/03/19 13:02:18 pedro Exp $ $OpenBSD: zopen.c,v 1.17 2005/08/25 17:07:56 millert Exp $ $OpenBSD: nullopen.c,v 1.3 2005/06/26 18:20:26 otto Exp $ Patches against current OpenBSD CVS compress sources [EMAIL PROTECTED]:/cvs cvs co src/usr.bin/compress The BSD makefile, builds with pmake, but -Werror breaks the build and main.c needs -D_GNU_SOURCE on linux, so I lazily compiled it and linked by hand. gcc -D_GNU_SOURCE -g -c main.c gcc -D_GNU_SOURCE -g -o compress main.o zopen.o gzopen.o nullopen.o -lz --- tmp/compress/compress.h 2005-06-27 04:20:26.000000000 +1000 +++ compress/compress.h 2007-10-28 12:59:46.000000000 +1100 @@ -80,3 +80,7 @@ extern int null_close(void *, struct z_i extern int null_flush(void *, int); extern void setfile(const char *, int, struct stat *); + +#ifndef EFTYPE /* Inappropriate file type or format */ +#define EFTYPE ENOTEMPTY +#endif --- tmp/compress/gzopen.c 2007-03-20 00:02:18.000000000 +1100 +++ compress/gzopen.c 2007-10-28 15:54:46.000000000 +1100 @@ -74,6 +74,16 @@ const char gz_rcsid[] = #include <zlib.h> #include "compress.h" +#ifdef linux +#include <endian.h> +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define htole32(x) (x) +#endif +#if __BYTE_ORDER == __BIG_ENDIAN +#define htole32 __swap32 +#endif +#endif + /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ --- tmp/compress/main.c 2007-04-04 23:29:45.000000000 +1000 +++ compress/main.c 2007-11-01 22:05:01.000000000 +1100 @@ -54,6 +54,9 @@ static const char main_rcsid[] = "$OpenB #include <unistd.h> #include <fcntl.h> #include <paths.h> +#if defined(linux) +#include <time.h> +#endif #include "compress.h" #define min(a,b) ((a) < (b)? (a) : (b)) @@ -92,6 +95,65 @@ const struct compressor null_method = { "null", ".nul", "XX", null_open, null_read, null_write, null_close }; #endif /* SMALL */ +#define HAVE_FCHFLAGS +#if defined(linux) +#undef HAVE_FCHFLAGS +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +#define __dead __attribute__((__noreturn__)) +#endif +/* $OpenBSD: strlcpy.c,v 1.11 2006/05/05 15:27:38 millert Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller <[EMAIL PROTECTED]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} +#endif + int permission(const char *); __dead void usage(int); int docompress(const char *, char *, const struct compressor *, @@ -676,10 +738,15 @@ dodecompress(const char *in, char *out, } if (storename && !cat) { if (info.mtime != 0) { +#if !defined(linux) sb->st_mtimespec.tv_sec = sb->st_atimespec.tv_sec = info.mtime; sb->st_mtimespec.tv_nsec = sb->st_atimespec.tv_nsec = 0; +#else + sb->st_mtim.tv_sec = sb->st_atim.tv_sec = info.mtime; + sb->st_mtim.tv_nsec = sb->st_atim.tv_nsec = 0; +#endif } else storename = 0; /* no timestamp to restore */ } @@ -744,11 +811,18 @@ setfile(const char *name, int fd, struct if (fchmod(fd, fs->st_mode)) warn("fchmod: %s", name); +#if defined(HAVE_FCHFLAGS) if (fs->st_flags && fchflags(fd, fs->st_flags)) warn("fchflags: %s", name); +#endif +#if !defined(linux) TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atimespec); TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtimespec); +#else + TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atim); + TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtim); +#endif if (futimes(fd, tv)) warn("futimes: %s", name); } --- tmp/compress/zopen.c 2005-08-26 03:07:56.000000000 +1000 +++ compress/zopen.c 2007-10-28 12:58:44.000000000 +1100 @@ -753,8 +753,28 @@ zopen(const char *name, const char *mode close(fd); return NULL; } +#if !defined(linux) return funopen(cookie, (*mode == 'r'?zread:NULL), (*mode == 'w'?zwrite:NULL), NULL, zclose); +#else + { +#include <libio.h> + cookie_io_functions_t io_funct; + const char *open_mode; + + io_funct.close = zclose; + io_funct.seek = io_funct.read = io_funct.write = NULL; + if ('w' == *mode) { + io_funct.write = zwrite; + open_mode = "w"; + } + else { + io_funct.read = zread; + open_mode = "r"; + } + return fopencookie(cookie, open_mode, io_funct); + } +#endif } void * -- System Information: Debian Release: lenny/sid APT prefers gutsy-updates APT policy: (500, 'gutsy-updates'), (500, 'gutsy-security'), (500, 'gutsy') Architecture: amd64 (x86_64) Kernel: Linux 2.6.22-14-generic (SMP w/2 CPU cores) Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Versions of packages gzip depends on: ii debianutils 2.22.1 Miscellaneous utilities specific t ii libc6 2.6.1-1ubuntu9 GNU C Library: Shared libraries gzip recommends no packages. -- no debconf information