pitrou commented on code in PR #48865:
URL: https://github.com/apache/arrow/pull/48865#discussion_r2704899037


##########
cpp/src/arrow/util/compression_test.cc:
##########
@@ -484,37 +468,169 @@ TEST(TestCodecMisc, SpecifyCodecOptionsGZip) {
   }
 }
 
+TEST(TestCodecMisc, SpecifyCodecOptionsGZip) {
+  auto make_option = [](int compression_level, GZipFormat format,
+                        std::optional<int> window_bits) {
+    arrow::util::GZipCodecOptions option;
+    option.compression_level = compression_level;
+    option.gzip_format = format;
+    option.window_bits = window_bits;
+    return option;
+  };
+  const std::pair<arrow::util::GZipCodecOptions, bool> options[]{
+      {make_option(5, GZipFormat::GZIP, 15), true},
+      {make_option(9, GZipFormat::ZLIB, 12), true},
+      {make_option(-1, GZipFormat::DEFLATE, 10), true},
+      {make_option(10, GZipFormat::GZIP, 25), false},
+      {make_option(-992, GZipFormat::GZIP, 15), false},
+  };
+  CheckSpecifyCodecOptions<arrow::util::GZipCodecOptions>(Compression::GZIP, 
options);
+}
+
 TEST(TestCodecMisc, SpecifyCodecOptionsBrotli) {
-  // for now only GZIP & Brotli codec options supported, since it has specific 
parameters
-  // to be customized, other codecs could directly go with CodecOptions, could 
add more
-  // specific codec options if needed.
-  struct CombinationOption {
-    int level;
-    int window_bits;
-    bool expect_success;
+  auto make_option = [](int compression_level, std::optional<int> window_bits) 
{
+    arrow::util::BrotliCodecOptions option;
+    option.compression_level = compression_level;
+    option.window_bits = window_bits;
+    return option;
   };
-  constexpr CombinationOption combinations[] = {
-      {8, 22, true}, {11, 10, true}, {1, 24, true}, {5, -12, false}, {-992, 
25, false}};
+  const std::pair<arrow::util::BrotliCodecOptions, bool> options[]{
+      {make_option(8, 22), true},     {make_option(11, 10), true},
+      {make_option(1, 24), true},     {make_option(5, -12), false},
+      {make_option(-992, 25), false},
+  };
+  
CheckSpecifyCodecOptions<arrow::util::BrotliCodecOptions>(Compression::BROTLI, 
options);
+}
 
-  std::vector<uint8_t> data = MakeRandomData(2000);
-  for (const auto& combination : combinations) {
-    const auto compression = Compression::BROTLI;
-    if (!Codec::IsAvailable(compression)) {
-      // Support for this codec hasn't been built
-      continue;
+TEST(TestCodecMisc, SpecifyCodecOptionsZstd) {
+  auto make_option = [](int compression_level,
+                        std::vector<std::pair<int, int>> 
compression_context_params,
+                        std::vector<std::pair<int, int>> 
decompression_context_params) {
+    arrow::util::ZstdCodecOptions option;
+    option.compression_level = compression_level;
+    option.compression_context_params = std::move(compression_context_params);
+    option.decompression_context_params = 
std::move(decompression_context_params);
+    return option;
+  };
+  constexpr int ZSTD_c_windowLog = 101;
+  const std::pair<arrow::util::ZstdCodecOptions, bool> options[]{
+      {make_option(2, {}, {}), true},
+      {make_option(9, {}, {}), true},
+      {make_option(15, {}, {}), true},
+      {make_option(-992, {}, {}), true},
+      {make_option(3, {{ZSTD_c_windowLog, 23}}, {}), true},
+      {make_option(3, {{ZSTD_c_windowLog, 28}}, {}), true}};
+  CheckSpecifyCodecOptions<arrow::util::ZstdCodecOptions>(Compression::ZSTD, 
options);
+}
+
+TEST(TestCodecMisc, ZstdLargerWindowLog) {
+  constexpr int ZSTD_c_windowLog = 101;
+
+  arrow::util::ZstdCodecOptions option1;
+  arrow::util::ZstdCodecOptions option2;
+  option2.compression_context_params = {{ZSTD_c_windowLog, 28}};
+
+  std::vector<uint8_t> data = MakeRandomData(4 * 1024 * 1024);
+  data.reserve(data.size() * 2);
+  data.insert(data.end(), data.begin(), data.end());
+
+  auto compress = [&data](const arrow::util::ZstdCodecOptions& codecOption)

Review Comment:
   Nit: `codec_option` or `codec_options`



##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -235,13 +226,47 @@ class ZSTDCodec : public Codec {
   int compression_level() const override { return compression_level_; }
 
  private:
+  Result<CCtxPtr> CreateCCtx() const {
+    CCtxPtr cctx{ZSTD_createCCtx(), ZSTD_freeCCtx};
+    auto ret =
+        ZSTD_CCtx_setParameter(cctx.get(), ZSTD_c_compressionLevel, 
compression_level_);
+    if (ZSTD_isError(ret)) {
+      return ZSTDError(ret, "ZSTD_CCtx create failed: ");
+    }
+    for (auto& [key, value] : compression_context_params_) {
+      ret = ZSTD_CCtx_setParameter(cctx.get(), 
static_cast<ZSTD_cParameter>(key), value);
+      if (ZSTD_isError(ret)) {
+        return ZSTDError(ret, "ZSTD_CCtx create failed: ");
+      }
+    }
+    return cctx;
+  }
+
+  Result<DCtxPtr> CreateDCtx() const {
+    DCtxPtr dctx{ZSTD_createDCtx(), ZSTD_freeDCtx};

Review Comment:
   Same question.



##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -235,13 +226,47 @@ class ZSTDCodec : public Codec {
   int compression_level() const override { return compression_level_; }
 
  private:
+  Result<CCtxPtr> CreateCCtx() const {
+    CCtxPtr cctx{ZSTD_createCCtx(), ZSTD_freeCCtx};

Review Comment:
   Do we want to check that the context pointer is non-null or is that not 
necessary? (would `ZSTD_CCtx_setParameter` raise if given a null context?)



##########
cpp/src/arrow/util/compression_test.cc:
##########
@@ -446,36 +449,17 @@ TEST(TestCodecMisc, SpecifyCompressionLevel) {
   }
 }
 
-TEST(TestCodecMisc, SpecifyCodecOptionsGZip) {
-  // for now only GZIP & Brotli codec options supported, since it has specific 
parameters
-  // to be customized, other codecs could directly go with CodecOptions, could 
add more
-  // specific codec options if needed.
-  struct CombinationOption {
-    int level;
-    GZipFormat format;
-    int window_bits;
-    bool expect_success;
-  };
-  constexpr CombinationOption combinations[] = {{2, GZipFormat::ZLIB, 12, 
true},
-                                                {9, GZipFormat::GZIP, 9, true},
-                                                {9, GZipFormat::GZIP, 20, 
false},
-                                                {5, GZipFormat::DEFLATE, -12, 
false},
-                                                {-992, GZipFormat::GZIP, 15, 
false}};
-
+template <std::derived_from<arrow::util::CodecOptions> T>
+void CheckSpecifyCodecOptions(Compression::type compression,
+                              std::span<const std::pair<T, bool>> options) {
   std::vector<uint8_t> data = MakeRandomData(2000);
-  for (const auto& combination : combinations) {
-    const auto compression = Compression::GZIP;
+  for (const auto& [codec_option, expect_success] : options) {
     if (!Codec::IsAvailable(compression)) {
       // Support for this codec hasn't been built
       continue;

Review Comment:
   Since we are looking at this, can we use `GTEST_SKIP`?



##########
cpp/src/parquet/properties_test.cc:
##########
@@ -81,9 +81,13 @@ TEST(TestWriterProperties, AdvancedHandling) {
 }
 
 TEST(TestWriterProperties, SetCodecOptions) {
+  constexpr int ZSTD_c_windowLog = 101;

Review Comment:
   I agree we can just keep this in the tests.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to