This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c7cea527fe GH-47591: [C++] Fix passing zlib compression level (#47594)
c7cea527fe is described below

commit c7cea527fe9b2a18749e9485941ceca6111fbdde
Author: Antoine Pitrou <anto...@python.org>
AuthorDate: Thu Sep 18 10:14:22 2025 +0200

    GH-47591: [C++] Fix passing zlib compression level (#47594)
    
    ### Rationale for this change
    
    When passing the compression level to the zlib initiation method 
`deflateInit2`, we were actually passing it to the wrong parameter (the 
`memLevel` parameter).
    
    As a consequence, changing the zlib/gzip "compression level" in the Arrow 
APIs had little effect on actual compressed size.
    
    ### What changes are included in this PR?
    
    Pass compression level correctly.
    
    ### Are these changes tested?
    
    They are exercised by regular CI tests. In addition, I tested manually that 
changing the compression level now affects compressed size accordingly.
    
    ### Are there any user-facing changes?
    
    Yes, this fixes behavior so as to match the documented semantics.
    
    * GitHub Issue: #47591
    
    Authored-by: Antoine Pitrou <anto...@python.org>
    Signed-off-by: Antoine Pitrou <anto...@python.org>
---
 cpp/src/arrow/util/compression_zlib.cc | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/util/compression_zlib.cc 
b/cpp/src/arrow/util/compression_zlib.cc
index 157716c367..b06cf2d224 100644
--- a/cpp/src/arrow/util/compression_zlib.cc
+++ b/cpp/src/arrow/util/compression_zlib.cc
@@ -58,6 +58,16 @@ constexpr int GZIP_CODEC = 16;
 // Determine if this is libz or gzip from header.
 constexpr int DETECT_CODEC = 32;
 
+// Default "memory level"
+//
+// Memory consumption when compressing is given by the formula:
+// `(1 << (windowBits+2)) +  (1 << (memLevel+9))`
+//
+// With windowBits=15 and memLevel=8 (default zlib values), 262 kB is used.
+//
+// (see `zconf.h` from zlib)
+constexpr int kGzipDefaultMemLevel = 8;
+
 constexpr int kGZipMinCompressionLevel = 1;
 constexpr int kGZipMaxCompressionLevel = 9;
 
@@ -196,8 +206,8 @@ class GZipCompressor : public Compressor {
     int ret;
     // Initialize to run specified format
     int window_bits = CompressionWindowBitsForFormat(format, 
input_window_bits);
-    if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 
window_bits,
-                            compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
+    if ((ret = deflateInit2(&stream_, compression_level_, Z_DEFLATED, 
window_bits,
+                            kGzipDefaultMemLevel, Z_DEFAULT_STRATEGY)) != 
Z_OK) {
       return ZlibError("zlib deflateInit failed: ");
     } else {
       initialized_ = true;
@@ -343,8 +353,8 @@ class GZipCodec : public Codec {
     int ret;
     // Initialize to run specified format
     int window_bits = CompressionWindowBitsForFormat(format_, window_bits_);
-    if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 
window_bits,
-                            compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
+    if ((ret = deflateInit2(&stream_, compression_level_, Z_DEFLATED, 
window_bits,
+                            kGzipDefaultMemLevel, Z_DEFAULT_STRATEGY)) != 
Z_OK) {
       return ZlibErrorPrefix("zlib deflateInit failed: ", stream_.msg);
     }
     compressor_initialized_ = true;

Reply via email to