lidavidm commented on a change in pull request #12673:
URL: https://github.com/apache/arrow/pull/12673#discussion_r831028851
##########
File path: cpp/src/parquet/properties.h
##########
@@ -171,64 +171,82 @@ class PARQUET_EXPORT WriterProperties {
created_by_(DEFAULT_CREATED_BY) {}
virtual ~Builder() {}
+ /// Specify the memory pool for the writer. Default default_memory_pool.
Builder* memory_pool(MemoryPool* pool) {
pool_ = pool;
return this;
}
+ /// Enable dictionary encoding in general for all columns. Default enabled.
Builder* enable_dictionary() {
default_column_properties_.set_dictionary_enabled(true);
return this;
}
+ /// Disable dictionary encoding in general for all columns. Default
enabled.
Builder* disable_dictionary() {
default_column_properties_.set_dictionary_enabled(false);
return this;
}
+ /// Enable dictionary encoding for column specified by `path`. Default
enabled.
Builder* enable_dictionary(const std::string& path) {
dictionary_enabled_[path] = true;
return this;
}
+ /// Enable dictionary encoding for column specified by `path`. Default
enabled.
Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->enable_dictionary(path->ToDotString());
}
+ /// Disable dictionary encoding for column specified by `path`. Default
enabled.
Builder* disable_dictionary(const std::string& path) {
dictionary_enabled_[path] = false;
return this;
}
+ /// Disable dictionary encoding for column specified by `path`. Default
enabled.
Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->disable_dictionary(path->ToDotString());
}
+ /// Specify the dictionary page size limit per row group. Default 1MB.
Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) {
dictionary_pagesize_limit_ = dictionary_psize_limit;
return this;
}
+ /// Specify the write batch size while writing batches of Arrow values
into Parquet.
+ /// Default 1024.
Builder* write_batch_size(int64_t write_batch_size) {
write_batch_size_ = write_batch_size;
return this;
}
+ /// Specify the max row group length.
+ /// Default 64M.
Builder* max_row_group_length(int64_t max_row_group_length) {
max_row_group_length_ = max_row_group_length;
return this;
}
+ /// Specify the data page size.
+ /// Default 1MB.
Builder* data_pagesize(int64_t pg_size) {
pagesize_ = pg_size;
return this;
}
+ /// Specify the data page version.
+ /// Default V1.
Builder* data_page_version(ParquetDataPageVersion data_page_version) {
data_page_version_ = data_page_version;
return this;
}
+ /// Specify the data page version.
Review comment:
Parquet format version, not data page version?
##########
File path: cpp/src/parquet/properties.h
##########
@@ -351,40 +371,57 @@ class PARQUET_EXPORT WriterProperties {
return this->compression_level(path->ToDotString(), compression_level);
}
+ /// Define the file encryption properties.
+ /// Default NULL.
Builder* encryption(
std::shared_ptr<FileEncryptionProperties> file_encryption_properties) {
file_encryption_properties_ = std::move(file_encryption_properties);
return this;
}
+ /// Enable statistics in general.
+ /// Default enabled.
Builder* enable_statistics() {
default_column_properties_.set_statistics_enabled(true);
return this;
}
+ /// Disable statistics in general.
+ /// Default enabled.
Builder* disable_statistics() {
default_column_properties_.set_statistics_enabled(false);
return this;
}
+ /// Enable statistics for the column specified by `path`.
+ /// Default enabled.
Builder* enable_statistics(const std::string& path) {
statistics_enabled_[path] = true;
return this;
}
+ /// Enable statistics for the column specified by `path`.
+ /// Default enabled.
Builder* enable_statistics(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->enable_statistics(path->ToDotString());
}
+ /// Disable statistics for the column specified by `path`.
+ /// Default enabled.
Builder* disable_statistics(const std::string& path) {
statistics_enabled_[path] = false;
return this;
}
+ /// Disable statistics for the column specified by `path`.
+ /// Default enabled.
Builder* disable_statistics(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->disable_statistics(path->ToDotString());
}
+ /// \brief This function builds the WriterProperties according to the
parameters of
+ /// builder.
Review comment:
nit: write the docstring imperatively, so 'Build the WriterProperties
with the builder parameters' or something like that.
##########
File path: cpp/src/parquet/properties.h
##########
@@ -171,64 +171,82 @@ class PARQUET_EXPORT WriterProperties {
created_by_(DEFAULT_CREATED_BY) {}
virtual ~Builder() {}
+ /// Specify the memory pool for the writer. Default default_memory_pool.
Builder* memory_pool(MemoryPool* pool) {
pool_ = pool;
return this;
}
+ /// Enable dictionary encoding in general for all columns. Default enabled.
Builder* enable_dictionary() {
default_column_properties_.set_dictionary_enabled(true);
return this;
}
+ /// Disable dictionary encoding in general for all columns. Default
enabled.
Builder* disable_dictionary() {
default_column_properties_.set_dictionary_enabled(false);
return this;
}
+ /// Enable dictionary encoding for column specified by `path`. Default
enabled.
Builder* enable_dictionary(const std::string& path) {
dictionary_enabled_[path] = true;
return this;
}
+ /// Enable dictionary encoding for column specified by `path`. Default
enabled.
Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->enable_dictionary(path->ToDotString());
}
+ /// Disable dictionary encoding for column specified by `path`. Default
enabled.
Builder* disable_dictionary(const std::string& path) {
dictionary_enabled_[path] = false;
return this;
}
+ /// Disable dictionary encoding for column specified by `path`. Default
enabled.
Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>&
path) {
return this->disable_dictionary(path->ToDotString());
}
+ /// Specify the dictionary page size limit per row group. Default 1MB.
Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) {
dictionary_pagesize_limit_ = dictionary_psize_limit;
return this;
}
+ /// Specify the write batch size while writing batches of Arrow values
into Parquet.
+ /// Default 1024.
Builder* write_batch_size(int64_t write_batch_size) {
write_batch_size_ = write_batch_size;
return this;
}
+ /// Specify the max row group length.
+ /// Default 64M.
Builder* max_row_group_length(int64_t max_row_group_length) {
max_row_group_length_ = max_row_group_length;
return this;
}
+ /// Specify the data page size.
+ /// Default 1MB.
Builder* data_pagesize(int64_t pg_size) {
pagesize_ = pg_size;
return this;
}
+ /// Specify the data page version.
+ /// Default V1.
Builder* data_page_version(ParquetDataPageVersion data_page_version) {
data_page_version_ = data_page_version;
return this;
}
+ /// Specify the data page version.
Review comment:
Parquet file version, not data page version?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]