kszucs commented on code in PR #45360:
URL: https://github.com/apache/arrow/pull/45360#discussion_r2083091794


##########
cpp/src/parquet/properties.h:
##########
@@ -275,10 +305,38 @@ class PARQUET_EXPORT WriterProperties {
           page_checksum_enabled_(properties.page_checksum_enabled()),
           size_statistics_level_(properties.size_statistics_level()),
           sorting_columns_(properties.sorting_columns()),
-          default_column_properties_(properties.default_column_properties()) {}
+          default_column_properties_(properties.default_column_properties()),
+          content_defined_chunking_enabled_(
+              properties.content_defined_chunking_enabled()),
+          content_defined_chunking_options_(
+              properties.content_defined_chunking_options()) {}
 
     virtual ~Builder() {}
 
+    /// \brief EXPERIMENTAL: Use content-defined page chunking for all columns.
+    ///
+    /// Optimize parquet files for content addressable storage (CAS) systems 
by writing
+    /// data pages according to content-defined chunk boundaries. This allows 
for more
+    /// efficient deduplication of data across files, hence more efficient 
network
+    /// transfers and storage. The chunking is based on a rolling hash 
algorithm that
+    /// identifies chunk boundaries based on the actual content of the data.
+    Builder* enable_content_defined_chunking() {
+      content_defined_chunking_enabled_ = true;
+      return this;
+    }
+
+    /// \brief EXPERIMENTAL: Disable content-defined page chunking for all 
columns.
+    Builder* disable_content_defined_chunking() {
+      content_defined_chunking_enabled_ = false;
+      return this;
+    }
+
+    /// \brief EXPERIMENTAL: Specify content-defined chunking options, see 
CdcOptions.
+    Builder* content_defined_chunking_options(const CdcOptions options) {

Review Comment:
   Updated to the const ref.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to