Let's try once more, patches inline (new email client, gah!)

0001-gzip-default-level-with-ENABLE_FEATURE_GZIP_LEVELS-s.patch

>From 9d06f01e2805a5d6f1d775ceb651ae18ae2e1808 Mon Sep 17 00:00:00 2001
From: Daniel Edgecumbe <[email protected]>
Date: Mon, 2 Sep 2019 22:03:14 +0100
Subject: [PATCH 1/3] gzip: default level with ENABLE_FEATURE_GZIP_LEVELS
 should be 6

Fixes an off-by-one that actually resulted in level 7 being used
---
 archival/gzip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archival/gzip.c b/archival/gzip.c
index 17341de45..37db347b8 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -2222,7 +2222,7 @@ int gzip_main(int argc UNUSED_PARAM, char **argv)
 #if ENABLE_FEATURE_GZIP_LEVELS
        opt >>= (BBUNPK_OPTSTRLEN IF_FEATURE_GZIP_DECOMPRESS(+ 2) + 1); /* drop 
cfkvq[dt]n bits */
        if (opt == 0)
-               opt = 1 << 6; /* default: 6 */
+               opt = 1 << 5; /* default: 6 */
        opt = ffs(opt >> 4); /* Maps -1..-4 to [0], -5 to [1] ... -9 to [5] */
        max_chain_length = 1 << gzip_level_config[opt].chain_shift;
        good_match       = gzip_level_config[opt].good;
-- 
2.23.0

0002-gzip-set-compression-flags-correctly-as-per-standard.patch

>From 4280c9633b359dcbf2ddadcf33790b8690f81c82 Mon Sep 17 00:00:00 2001
From: Daniel Edgecumbe <[email protected]>
Date: Mon, 2 Sep 2019 22:05:26 +0100
Subject: [PATCH 2/3] gzip: set compression flags correctly as per standard

With this change and CONFIG_GZIP_FAST=2, CONFIG_FEATURE_GZIP_LEVELS=y,

GNU gzip and BusyBox gzip now produce identical output at each compression
level (excluding 1..3, as BusyBox does not implement these levels).
---
 archival/gzip.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/archival/gzip.c b/archival/gzip.c
index 37db347b8..f13748aa1 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -259,6 +259,7 @@ enum {
 
 #if !ENABLE_FEATURE_GZIP_LEVELS
 
+       comp_level = 9,
        max_chain_length = 4096,
 /* To speed up deflation, hash chains are never searched beyond this length.
  * A higher limit improves compression ratio but degrades the speed.
@@ -334,10 +335,12 @@ struct globals {
 #define head (G1.prev + WSIZE) /* hash head (see deflate.c) */
 
 #if ENABLE_FEATURE_GZIP_LEVELS
+       unsigned comp_level;
        unsigned max_chain_length;
        unsigned max_lazy_match;
        unsigned good_match;
        unsigned nice_match;
+#define comp_level (G1.comp_level)
 #define max_chain_length (G1.max_chain_length)
 #define max_lazy_match   (G1.max_lazy_match)
 #define good_match      (G1.good_match)
@@ -1919,7 +1922,7 @@ static void bi_init(void)
 /* ===========================================================================
  * Initialize the "longest match" routines for a new file
  */
-static void lm_init(unsigned *flags16p)
+static void lm_init(void)
 {
        unsigned j;
 
@@ -1927,8 +1930,6 @@ static void lm_init(unsigned *flags16p)
        memset(head, 0, HASH_SIZE * sizeof(*head));
        /* prev will be initialized on the fly */
 
-       /* speed options for the general purpose bit flag */
-       *flags16p |= 2; /* FAST 4, SLOW 2 */
        /* ??? reduce max_chain_length for binary files */
 
        //G1.strstart = 0; // globals are zeroed in pack_gzip()
@@ -2076,10 +2077,16 @@ static void zip(void)
 
        bi_init();
        ct_init();
-       deflate_flags = 0;  /* pkzip -es, -en or -ex equivalent */
-       lm_init(&deflate_flags);
+       lm_init();
 
-       put_16bit(deflate_flags | 0x300); /* extra flags. OS id = 3 (Unix) */
+       deflate_flags = 0x300; /* extra flags. OS id = 3 (Unix) */
+#if ENABLE_FEATURE_GZIP_LEVELS
+       /* Note that comp_levels < 4 do not exist in this version of gzip */
+       if (comp_level == 9) {
+               deflate_flags |= 0x02; /* SLOW flag */
+       }
+#endif
+       put_16bit(deflate_flags);
 
        /* The above 32-bit misaligns outbuf (10 bytes are stored), flush it */
        flush_outbuf_if_32bit_optimized();
@@ -2224,6 +2231,9 @@ int gzip_main(int argc UNUSED_PARAM, char **argv)
        if (opt == 0)
                opt = 1 << 5; /* default: 6 */
        opt = ffs(opt >> 4); /* Maps -1..-4 to [0], -5 to [1] ... -9 to [5] */
+
+       comp_level = opt + 4;
+
        max_chain_length = 1 << gzip_level_config[opt].chain_shift;
        good_match       = gzip_level_config[opt].good;
        max_lazy_match   = gzip_level_config[opt].lazy2 * 2;
-- 
2.23.0

0003-gzip-set-default-compression-level-to-6-when-CONFIG_.patch

>From 12d30559486502feec4e2821b3ab45ae6139e7aa Mon Sep 17 00:00:00 2001
From: Daniel Edgecumbe <[email protected]>
Date: Mon, 2 Sep 2019 22:09:15 +0100
Subject: [PATCH 3/3] gzip: set default compression level to 6 when
 CONFIG_FEATURE_GZIP_LEVELS=n

With this change, GNU gzip -n and BusyBox gzip now produce identical output
assuming that CONFIG_GZIP_FAST=2.
---
 archival/gzip.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/archival/gzip.c b/archival/gzip.c
index f13748aa1..779df5c19 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -52,7 +52,7 @@ aa:      85.1% -- replaced with aa.gz
 //config:      help
 //config:      Enable support for compression levels 4-9. The default level
 //config:      is 6. If levels 1-3 are specified, 4 is used.
-//config:      If this option is not selected, -N options are ignored and -9
+//config:      If this option is not selected, -N options are ignored and -6
 //config:      is used.
 //config:
 //config:config FEATURE_GZIP_DECOMPRESS
@@ -259,13 +259,13 @@ enum {
 
 #if !ENABLE_FEATURE_GZIP_LEVELS
 
-       comp_level = 9,
-       max_chain_length = 4096,
+       comp_level = 6,
+       max_chain_length = 128,
 /* To speed up deflation, hash chains are never searched beyond this length.
  * A higher limit improves compression ratio but degrades the speed.
  */
 
-       max_lazy_match = 258,
+       max_lazy_match = 16,
 /* Attempt to find a better match only when the current match is strictly
  * smaller than this value. This mechanism is used only for compression
  * levels >= 4.
@@ -277,7 +277,7 @@ enum {
  * max_insert_length is used only for compression levels <= 3.
  */
 
-       good_match = 32,
+       good_match = 8,
 /* Use a faster search when the previous match is longer than this */
 
 /* Values for max_lazy_match, good_match and max_chain_length, depending on
@@ -286,7 +286,7 @@ enum {
  * found for specific files.
  */
 
-       nice_match = 258,       /* Stop searching when current match exceeds 
this */
+       nice_match = 128,       /* Stop searching when current match exceeds 
this */
 /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
  * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
  * meaning.
-- 
2.23.0

On 02/09/2019 23.43, Daniel Edgecumbe wrote:
> A discussion with eschwartz on the Arch Linux freenode IRC channel led to the 
> discovery of some minor implementation details lacking in the BusyBox gzip 
> applet which can cause output to differ both across GNU gzip and BusyBox, and 
> different versions of BusyBox.
> 
> Please find attached three seperate patches for the solution of these issues.
> 
> I've also pushed the branch at
> https://git.esotericnonsense.com/busybox.git/
> 
> This is a re-submission as my original e-mail was bounced due to not being an 
> ML member.
> 
> 
> _______________________________________________
> busybox mailing list
> [email protected]
> http://lists.busybox.net/mailman/listinfo/busybox
> 

-- 
Daniel Edgecumbe | esotericnonsense
Kalix NO, Sverige | +358 46 584 2810
[email protected] | https://esotericnonsense.com
_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to