pitrou commented on code in PR #48865:
URL: https://github.com/apache/arrow/pull/48865#discussion_r2694426413
##########
cpp/src/arrow/util/compression_test.cc:
##########
@@ -22,9 +22,11 @@
#include <ostream>
#include <random>
#include <string>
+#include <unordered_map>
#include <vector>
#include <gtest/gtest.h>
+#include <zstd.h>
Review Comment:
We can't include this unconditionally because zstd might not be present.
##########
cpp/src/arrow/util/compression_test.cc:
##########
@@ -447,9 +449,9 @@ TEST(TestCodecMisc, SpecifyCompressionLevel) {
}
TEST(TestCodecMisc, SpecifyCodecOptionsGZip) {
Review Comment:
There starts to be a lot of code duplication between all those codec options
tests, can you factor out a common helper method to run the actual tests?
##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -235,13 +224,43 @@ class ZSTDCodec : public Codec {
int compression_level() const override { return compression_level_; }
private:
+ Result<CCtxPtr> createCCtx() const {
+ auto cctx = ZSTD_createCCtx();
+ auto ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
compression_level_);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD_CCtx create failed: ");
+ }
+ for (auto& [key, value] : cctx_params_) {
+ ret = ZSTD_CCtx_setParameter(cctx, static_cast<ZSTD_cParameter>(key),
value);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD_CCtx create failed: ");
+ }
+ }
+ return {{cctx, ZSTD_freeCCtx}};
+ }
+
+ Result<DCtxPtr> createDCtx() const {
+ auto dctx = ZSTD_createDCtx();
+ for (auto& [key, value] : dctx_params_) {
+ auto ret = ZSTD_DCtx_setParameter(dctx,
static_cast<ZSTD_dParameter>(key), value);
Review Comment:
Same here.
##########
cpp/src/parquet/column_writer_test.cc:
##########
@@ -703,6 +704,13 @@ TYPED_TEST(TestPrimitiveWriter,
RequiredPlainWithStatsAndZstdCompression) {
this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false,
true,
LARGE_SIZE);
}
+TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithZstdCodecOptions) {
+ auto codec_options = std::make_shared<::arrow::util::ZstdCodecOptions>();
+ codec_options->cctx_params = {{ZSTD_c_windowLog, 23}};
+ codec_options->dctx_params = {{ZSTD_d_windowLogMax, 23}};
+ this->TestRequiredWithCodecOptions(Encoding::PLAIN, Compression::GZIP,
false, false,
Review Comment:
You're compressing with GZIP and `ZstdCodecOptions`?
##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -235,13 +224,43 @@ class ZSTDCodec : public Codec {
int compression_level() const override { return compression_level_; }
private:
+ Result<CCtxPtr> createCCtx() const {
+ auto cctx = ZSTD_createCCtx();
+ auto ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
compression_level_);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD_CCtx create failed: ");
Review Comment:
This will leak the cctx on error, you should create the `std::unique_ptr`
before this.
##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -235,13 +224,43 @@ class ZSTDCodec : public Codec {
int compression_level() const override { return compression_level_; }
private:
+ Result<CCtxPtr> createCCtx() const {
+ auto cctx = ZSTD_createCCtx();
+ auto ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
compression_level_);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD_CCtx create failed: ");
+ }
+ for (auto& [key, value] : cctx_params_) {
+ ret = ZSTD_CCtx_setParameter(cctx, static_cast<ZSTD_cParameter>(key),
value);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD_CCtx create failed: ");
+ }
+ }
+ return {{cctx, ZSTD_freeCCtx}};
+ }
+
+ Result<DCtxPtr> createDCtx() const {
+ auto dctx = ZSTD_createDCtx();
+ for (auto& [key, value] : dctx_params_) {
+ auto ret = ZSTD_DCtx_setParameter(dctx,
static_cast<ZSTD_dParameter>(key), value);
+ if (ZSTD_isError(ret)) {
+ return ZSTDError(ret, "ZSTD_DCtx create failed: ");
+ }
+ }
+ return {{dctx, ZSTD_freeDCtx}};
+ }
+
const int compression_level_;
+ const std::unordered_map<int, int> cctx_params_;
+ const std::unordered_map<int, int> dctx_params_;
};
} // namespace
-std::unique_ptr<Codec> MakeZSTDCodec(int compression_level) {
- return std::make_unique<ZSTDCodec>(compression_level);
+std::unique_ptr<Codec> MakeZSTDCodec(int compression_level,
+ std::unordered_map<int, int> cctx_params,
+ std::unordered_map<int, int> dctx_params)
{
+ return std::make_unique<ZSTDCodec>(compression_level, cctx_params,
dctx_params);
Review Comment:
Should move the params to avoid spurious copies.
##########
cpp/src/arrow/util/compression_test.cc:
##########
@@ -517,6 +519,40 @@ TEST(TestCodecMisc, SpecifyCodecOptionsBrotli) {
}
}
+TEST(TestCodecMisc, SpecifyCodecOptionsZstd) {
+ // for now only GZIP & Brotli & Zstd codec options supported, since it has
specific
+ // parameters to be customized, other codecs could directly go with
CodecOptions, could
+ // add more specific codec options if needed.
+ struct CombinationOption {
+ int level;
+ std::unordered_map<int, int> cctx_params;
+ std::unordered_map<int, int> dctx_params;
+ };
+ const CombinationOption combinations[] = {
+ {2, {}, {}},
+ {9, {}, {}},
+ {15, {}, {}},
+ {-992, {}, {}},
+ {3, {{ZSTD_c_windowLog, 23}}, {}},
+ {3, {{ZSTD_c_windowLog, 28}}, {{ZSTD_d_windowLogMax, 28}}}};
Review Comment:
Given that the ZSTD headers might not be there, we might have to hardcode
those options' numeric values instead.
##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -235,13 +224,43 @@ class ZSTDCodec : public Codec {
int compression_level() const override { return compression_level_; }
private:
+ Result<CCtxPtr> createCCtx() const {
Review Comment:
Let's use CamelCase as the rest of the codebase.
```suggestion
Result<CCtxPtr> CreateCCtx() const {
```
##########
cpp/src/arrow/util/compression.h:
##########
@@ -142,6 +143,15 @@ class ARROW_EXPORT BrotliCodecOptions : public
CodecOptions {
std::optional<int> window_bits;
};
+// ----------------------------------------------------------------------
+// Zstd codec options implementation
+
+class ARROW_EXPORT ZstdCodecOptions : public CodecOptions {
+ public:
+ std::unordered_map<int, int> cctx_params;
+ std::unordered_map<int, int> dctx_params;
Review Comment:
We don't need `std::unordered_map`, a `std::vector<std::pair<int, int>>`
would do IMHO.
##########
cpp/src/arrow/util/compression.h:
##########
@@ -142,6 +143,15 @@ class ARROW_EXPORT BrotliCodecOptions : public
CodecOptions {
std::optional<int> window_bits;
};
+// ----------------------------------------------------------------------
+// Zstd codec options implementation
+
+class ARROW_EXPORT ZstdCodecOptions : public CodecOptions {
+ public:
+ std::unordered_map<int, int> cctx_params;
+ std::unordered_map<int, int> dctx_params;
Review Comment:
Also let's call those something more explicit than "cctx" and "dctx".
##########
cpp/src/arrow/util/compression_zstd.cc:
##########
@@ -36,6 +37,9 @@ namespace internal {
namespace {
+using CCtxPtr = std::unique_ptr<ZSTD_CCtx, size_t (*)(ZSTD_CCtx*)>;
Review Comment:
We can probably make this more readable
```suggestion
using CCtxPtr = std::unique_ptr<ZSTD_CCtx, decltype(&ZSTD_freeCCtx)>;
```
##########
cpp/src/parquet/properties_test.cc:
##########
@@ -102,8 +107,14 @@ TEST(TestWriterProperties, SetCodecOptions) {
ASSERT_EQ(12, std::dynamic_pointer_cast<::arrow::util::GZipCodecOptions>(
props->codec_options(ColumnPath::FromDotString("gzip")))
->window_bits);
- ASSERT_EQ(Codec::UseDefaultCompressionLevel(),
+ ASSERT_EQ(3,
props->codec_options(ColumnPath::FromDotString("zstd"))->compression_level);
+ ASSERT_EQ(23, std::dynamic_pointer_cast<::arrow::util::ZstdCodecOptions>(
Review Comment:
I'm not sure these additional tests are useful. It if works for
`GZipCodecOptions`, then it will work for `ZstdCodecOptions` too.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]