XinyuZeng commented on a change in pull request #12673:
URL: https://github.com/apache/arrow/pull/12673#discussion_r830487794
##########
File path: cpp/src/parquet/properties.h
##########
@@ -176,59 +176,100 @@ class PARQUET_EXPORT WriterProperties {
return this;
}
+ /**
+ * Enable dictionary encoding in general for all columns. Default enabled.
+ */
Builder* enable_dictionary() {
default_column_properties_.set_dictionary_enabled(true);
return this;
}
+ /**
+ * Disable dictionary encoding in general for all columns. Default enabled.
+ */
Builder* disable_dictionary() {
default_column_properties_.set_dictionary_enabled(false);
return this;
}
+ /**
+ * Enable dictionary encoding for column specified by `path`. Default
enabled.
+ */
Builder* enable_dictionary(const std::string& path) {
dictionary_enabled_[path] = true;
return this;
}
+ /**
+ * Enable dictionary encoding for column specified by `path`. Default
enabled.
+ */
Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->enable_dictionary(path->ToDotString());
}
+ /**
+ * Disable dictionary encoding for column specified by `path`. Default
enabled.
+ */
Builder* disable_dictionary(const std::string& path) {
dictionary_enabled_[path] = false;
return this;
}
+ /**
+ * Disable dictionary encoding for column specified by `path`. Default
enabled.
+ */
Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->disable_dictionary(path->ToDotString());
}
+ /**
+ * Specify the dictionary page size limit per row group. Default 1MB.
+ */
Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) {
dictionary_pagesize_limit_ = dictionary_psize_limit;
return this;
}
+ /**
+ * Specify the write batch size while writing batches of Arrow values into
Parquet.
+ * Default 1024.
+ */
Builder* write_batch_size(int64_t write_batch_size) {
write_batch_size_ = write_batch_size;
return this;
}
+ /**
+ * Specify the max row group length.
+ * Default 64M.
Review comment:
Perhaps this should be changed to 1M, as discussed in user group a month
ago with @westonpace
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]