mapleFU commented on code in PR #35886:
URL: https://github.com/apache/arrow/pull/35886#discussion_r1223885218


##########
cpp/src/parquet/column_io_benchmark.cc:
##########
@@ -40,8 +40,8 @@ std::shared_ptr<Int64Writer> BuildWriter(int64_t output_size,
                                          ColumnDescriptor* schema,
                                          const WriterProperties* properties,
                                          Compression::type codec) {
-  std::unique_ptr<PageWriter> pager =
-      PageWriter::Open(dst, codec, Codec::UseDefaultCompressionLevel(), 
metadata);
+    std::unique_ptr<PageWriter> pager =

Review Comment:
   fmt?



##########
cpp/src/arrow/util/compression.h:
##########
@@ -107,6 +107,46 @@ class ARROW_EXPORT Decompressor {
   // XXX add methods for buffer size heuristics?
 };
 
+/// \brief Compression codec options
+class ARROW_EXPORT CodecOptions {
+ public:
+  CodecOptions(int compression_level = kUseDefaultCompressionLevel) {
+    compression_level_ = compression_level;
+  }
+  virtual ~CodecOptions() = default;
+
+  int compression_level_;
+};
+
+// ----------------------------------------------------------------------
+// gzip codec options implementation
+
+struct GZipFormat {
+  enum type {
+    ZLIB,
+    DEFLATE,
+    GZIP,
+  };
+};
+
+constexpr int kGZipDefaultWindowBits = 15;
+
+class ARROW_EXPORT GZipCodecOptions : public CodecOptions {
+ public:
+  GZipFormat::type gzip_format = GZipFormat::GZIP;
+  int window_bits = kGZipDefaultWindowBits;
+};
+
+// ----------------------------------------------------------------------
+// brotli codec options implementation
+
+constexpr int kBrotliDefaultWindowBits = 22;

Review Comment:
   Can it use `BROTLI_DEFAULT_WINDOW`?



##########
cpp/src/parquet/properties.h:
##########
@@ -192,6 +197,8 @@ class PARQUET_EXPORT ColumnProperties {
 
   int compression_level() const { return compression_level_; }

Review Comment:
   Now we have a `compression_level` and a `CodecOptions`. Previously, user can 
set `compression_level`, and the compression level would be changed.
   
   After this patch, if user change the compression level, the codec will not 
be changed.
   
   Can we make `CodecOptions` mutate it's `compression_level`, and make 
`set_compression_level` set the level in `CodecOptions` directly?



##########
cpp/src/parquet/column_writer.cc:
##########
@@ -690,26 +690,40 @@ class BufferedPageWriter : public PageWriter {
 
 std::unique_ptr<PageWriter> PageWriter::Open(
     std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
-    int compression_level, ColumnChunkMetaDataBuilder* metadata,
-    int16_t row_group_ordinal, int16_t column_chunk_ordinal, MemoryPool* pool,
-    bool buffered_row_group, std::shared_ptr<Encryptor> meta_encryptor,
-    std::shared_ptr<Encryptor> data_encryptor, bool 
page_write_checksum_enabled,
-    ColumnIndexBuilder* column_index_builder, OffsetIndexBuilder* 
offset_index_builder) {
+    const std::shared_ptr<CodecOptions>& codec_options,
+    ColumnChunkMetaDataBuilder* metadata, int16_t row_group_ordinal,
+    int16_t column_chunk_ordinal, MemoryPool* pool, bool buffered_row_group,
+    std::shared_ptr<Encryptor> meta_encryptor, std::shared_ptr<Encryptor> 
data_encryptor,
+    bool page_write_checksum_enabled, ColumnIndexBuilder* column_index_builder,
+    OffsetIndexBuilder* offset_index_builder) {
   if (buffered_row_group) {
     return std::unique_ptr<PageWriter>(new BufferedPageWriter(
-        std::move(sink), codec, compression_level, metadata, row_group_ordinal,
+        std::move(sink), codec, *codec_options.get(), metadata, 
row_group_ordinal,

Review Comment:
   I think just `*codec_options` is ok?



##########
cpp/src/arrow/util/compression.h:
##########
@@ -107,6 +107,50 @@ class ARROW_EXPORT Decompressor {
   // XXX add methods for buffer size heuristics?
 };
 
+/// \brief Compression codec options
+class ARROW_EXPORT CodecOptions {
+ public:
+  CodecOptions(int compression_level = kUseDefaultCompressionLevel) {
+    compression_level_ = compression_level;
+  }
+  virtual ~CodecOptions() = default;
+
+  int compression_level_;
+};

Review Comment:
   I guess @yyang52 's idea is that `CodecOptions` can explicit built from 
`compression_level`, am I right?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to