Let's try once more, patches inline (new email client, gah!) 0001-gzip-default-level-with-ENABLE_FEATURE_GZIP_LEVELS-s.patch
>From 9d06f01e2805a5d6f1d775ceb651ae18ae2e1808 Mon Sep 17 00:00:00 2001 From: Daniel Edgecumbe <[email protected]> Date: Mon, 2 Sep 2019 22:03:14 +0100 Subject: [PATCH 1/3] gzip: default level with ENABLE_FEATURE_GZIP_LEVELS should be 6 Fixes an off-by-one that actually resulted in level 7 being used --- archival/gzip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archival/gzip.c b/archival/gzip.c index 17341de45..37db347b8 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -2222,7 +2222,7 @@ int gzip_main(int argc UNUSED_PARAM, char **argv) #if ENABLE_FEATURE_GZIP_LEVELS opt >>= (BBUNPK_OPTSTRLEN IF_FEATURE_GZIP_DECOMPRESS(+ 2) + 1); /* drop cfkvq[dt]n bits */ if (opt == 0) - opt = 1 << 6; /* default: 6 */ + opt = 1 << 5; /* default: 6 */ opt = ffs(opt >> 4); /* Maps -1..-4 to [0], -5 to [1] ... -9 to [5] */ max_chain_length = 1 << gzip_level_config[opt].chain_shift; good_match = gzip_level_config[opt].good; -- 2.23.0 0002-gzip-set-compression-flags-correctly-as-per-standard.patch >From 4280c9633b359dcbf2ddadcf33790b8690f81c82 Mon Sep 17 00:00:00 2001 From: Daniel Edgecumbe <[email protected]> Date: Mon, 2 Sep 2019 22:05:26 +0100 Subject: [PATCH 2/3] gzip: set compression flags correctly as per standard With this change and CONFIG_GZIP_FAST=2, CONFIG_FEATURE_GZIP_LEVELS=y, GNU gzip and BusyBox gzip now produce identical output at each compression level (excluding 1..3, as BusyBox does not implement these levels). --- archival/gzip.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/archival/gzip.c b/archival/gzip.c index 37db347b8..f13748aa1 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -259,6 +259,7 @@ enum { #if !ENABLE_FEATURE_GZIP_LEVELS + comp_level = 9, max_chain_length = 4096, /* To speed up deflation, hash chains are never searched beyond this length. * A higher limit improves compression ratio but degrades the speed. @@ -334,10 +335,12 @@ struct globals { #define head (G1.prev + WSIZE) /* hash head (see deflate.c) */ #if ENABLE_FEATURE_GZIP_LEVELS + unsigned comp_level; unsigned max_chain_length; unsigned max_lazy_match; unsigned good_match; unsigned nice_match; +#define comp_level (G1.comp_level) #define max_chain_length (G1.max_chain_length) #define max_lazy_match (G1.max_lazy_match) #define good_match (G1.good_match) @@ -1919,7 +1922,7 @@ static void bi_init(void) /* =========================================================================== * Initialize the "longest match" routines for a new file */ -static void lm_init(unsigned *flags16p) +static void lm_init(void) { unsigned j; @@ -1927,8 +1930,6 @@ static void lm_init(unsigned *flags16p) memset(head, 0, HASH_SIZE * sizeof(*head)); /* prev will be initialized on the fly */ - /* speed options for the general purpose bit flag */ - *flags16p |= 2; /* FAST 4, SLOW 2 */ /* ??? reduce max_chain_length for binary files */ //G1.strstart = 0; // globals are zeroed in pack_gzip() @@ -2076,10 +2077,16 @@ static void zip(void) bi_init(); ct_init(); - deflate_flags = 0; /* pkzip -es, -en or -ex equivalent */ - lm_init(&deflate_flags); + lm_init(); - put_16bit(deflate_flags | 0x300); /* extra flags. OS id = 3 (Unix) */ + deflate_flags = 0x300; /* extra flags. OS id = 3 (Unix) */ +#if ENABLE_FEATURE_GZIP_LEVELS + /* Note that comp_levels < 4 do not exist in this version of gzip */ + if (comp_level == 9) { + deflate_flags |= 0x02; /* SLOW flag */ + } +#endif + put_16bit(deflate_flags); /* The above 32-bit misaligns outbuf (10 bytes are stored), flush it */ flush_outbuf_if_32bit_optimized(); @@ -2224,6 +2231,9 @@ int gzip_main(int argc UNUSED_PARAM, char **argv) if (opt == 0) opt = 1 << 5; /* default: 6 */ opt = ffs(opt >> 4); /* Maps -1..-4 to [0], -5 to [1] ... -9 to [5] */ + + comp_level = opt + 4; + max_chain_length = 1 << gzip_level_config[opt].chain_shift; good_match = gzip_level_config[opt].good; max_lazy_match = gzip_level_config[opt].lazy2 * 2; -- 2.23.0 0003-gzip-set-default-compression-level-to-6-when-CONFIG_.patch >From 12d30559486502feec4e2821b3ab45ae6139e7aa Mon Sep 17 00:00:00 2001 From: Daniel Edgecumbe <[email protected]> Date: Mon, 2 Sep 2019 22:09:15 +0100 Subject: [PATCH 3/3] gzip: set default compression level to 6 when CONFIG_FEATURE_GZIP_LEVELS=n With this change, GNU gzip -n and BusyBox gzip now produce identical output assuming that CONFIG_GZIP_FAST=2. --- archival/gzip.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/archival/gzip.c b/archival/gzip.c index f13748aa1..779df5c19 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -52,7 +52,7 @@ aa: 85.1% -- replaced with aa.gz //config: help //config: Enable support for compression levels 4-9. The default level //config: is 6. If levels 1-3 are specified, 4 is used. -//config: If this option is not selected, -N options are ignored and -9 +//config: If this option is not selected, -N options are ignored and -6 //config: is used. //config: //config:config FEATURE_GZIP_DECOMPRESS @@ -259,13 +259,13 @@ enum { #if !ENABLE_FEATURE_GZIP_LEVELS - comp_level = 9, - max_chain_length = 4096, + comp_level = 6, + max_chain_length = 128, /* To speed up deflation, hash chains are never searched beyond this length. * A higher limit improves compression ratio but degrades the speed. */ - max_lazy_match = 258, + max_lazy_match = 16, /* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. @@ -277,7 +277,7 @@ enum { * max_insert_length is used only for compression levels <= 3. */ - good_match = 32, + good_match = 8, /* Use a faster search when the previous match is longer than this */ /* Values for max_lazy_match, good_match and max_chain_length, depending on @@ -286,7 +286,7 @@ enum { * found for specific files. */ - nice_match = 258, /* Stop searching when current match exceeds this */ + nice_match = 128, /* Stop searching when current match exceeds this */ /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 * For deflate_fast() (levels <= 3) good is ignored and lazy has a different * meaning. -- 2.23.0 On 02/09/2019 23.43, Daniel Edgecumbe wrote: > A discussion with eschwartz on the Arch Linux freenode IRC channel led to the > discovery of some minor implementation details lacking in the BusyBox gzip > applet which can cause output to differ both across GNU gzip and BusyBox, and > different versions of BusyBox. > > Please find attached three seperate patches for the solution of these issues. > > I've also pushed the branch at > https://git.esotericnonsense.com/busybox.git/ > > This is a re-submission as my original e-mail was bounced due to not being an > ML member. > > > _______________________________________________ > busybox mailing list > [email protected] > http://lists.busybox.net/mailman/listinfo/busybox > -- Daniel Edgecumbe | esotericnonsense Kalix NO, Sverige | +358 46 584 2810 [email protected] | https://esotericnonsense.com _______________________________________________ busybox mailing list [email protected] http://lists.busybox.net/mailman/listinfo/busybox
