This is an automated email from the ASF dual-hosted git repository. apitrou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new c7cea527fe GH-47591: [C++] Fix passing zlib compression level (#47594) c7cea527fe is described below commit c7cea527fe9b2a18749e9485941ceca6111fbdde Author: Antoine Pitrou <anto...@python.org> AuthorDate: Thu Sep 18 10:14:22 2025 +0200 GH-47591: [C++] Fix passing zlib compression level (#47594) ### Rationale for this change When passing the compression level to the zlib initiation method `deflateInit2`, we were actually passing it to the wrong parameter (the `memLevel` parameter). As a consequence, changing the zlib/gzip "compression level" in the Arrow APIs had little effect on actual compressed size. ### What changes are included in this PR? Pass compression level correctly. ### Are these changes tested? They are exercised by regular CI tests. In addition, I tested manually that changing the compression level now affects compressed size accordingly. ### Are there any user-facing changes? Yes, this fixes behavior so as to match the documented semantics. * GitHub Issue: #47591 Authored-by: Antoine Pitrou <anto...@python.org> Signed-off-by: Antoine Pitrou <anto...@python.org> --- cpp/src/arrow/util/compression_zlib.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc index 157716c367..b06cf2d224 100644 --- a/cpp/src/arrow/util/compression_zlib.cc +++ b/cpp/src/arrow/util/compression_zlib.cc @@ -58,6 +58,16 @@ constexpr int GZIP_CODEC = 16; // Determine if this is libz or gzip from header. constexpr int DETECT_CODEC = 32; +// Default "memory level" +// +// Memory consumption when compressing is given by the formula: +// `(1 << (windowBits+2)) + (1 << (memLevel+9))` +// +// With windowBits=15 and memLevel=8 (default zlib values), 262 kB is used. +// +// (see `zconf.h` from zlib) +constexpr int kGzipDefaultMemLevel = 8; + constexpr int kGZipMinCompressionLevel = 1; constexpr int kGZipMaxCompressionLevel = 9; @@ -196,8 +206,8 @@ class GZipCompressor : public Compressor { int ret; // Initialize to run specified format int window_bits = CompressionWindowBitsForFormat(format, input_window_bits); - if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits, - compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) { + if ((ret = deflateInit2(&stream_, compression_level_, Z_DEFLATED, window_bits, + kGzipDefaultMemLevel, Z_DEFAULT_STRATEGY)) != Z_OK) { return ZlibError("zlib deflateInit failed: "); } else { initialized_ = true; @@ -343,8 +353,8 @@ class GZipCodec : public Codec { int ret; // Initialize to run specified format int window_bits = CompressionWindowBitsForFormat(format_, window_bits_); - if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits, - compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) { + if ((ret = deflateInit2(&stream_, compression_level_, Z_DEFLATED, window_bits, + kGzipDefaultMemLevel, Z_DEFAULT_STRATEGY)) != Z_OK) { return ZlibErrorPrefix("zlib deflateInit failed: ", stream_.msg); } compressor_initialized_ = true;