alamb commented on code in PR #2942:
URL: https://github.com/apache/arrow-rs/pull/2942#discussion_r1007159436


##########
parquet/src/file/properties.rs:
##########
@@ -271,19 +289,38 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets data page size limit.
+    /// Sets data page size limit

Review Comment:
   ```suggestion
       /// Sets best effort maximum size of a data page in bytes
   ```



##########
parquet/src/file/properties.rs:
##########
@@ -271,19 +289,38 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets data page size limit.
+    /// Sets data page size limit
+    ///
+    /// Note: this is a best effort limit based on the write batch size
     pub fn set_data_pagesize_limit(mut self, value: usize) -> Self {
         self.data_pagesize_limit = value;
         self
     }
 
-    /// Sets dictionary page size limit.
+    /// Sets data page row count limit
+    ///
+    ///
+    /// This can be used to limit the number of rows within a page to
+    /// yield better page pruning
+    ///
+    /// Note: this is a best effort limit based on the write batch size
+    pub fn set_data_page_row_count_limit(mut self, value: usize) -> Self {
+        self.data_page_row_count_limit = value;
+        self
+    }
+
+    /// Sets dictionary page size limit

Review Comment:
   ```suggestion
       /// Sets best effort maximum dictionary page size, in bytes
   ```



##########
parquet/tests/arrow_writer_layout.rs:
##########
@@ -308,6 +308,34 @@ fn test_primitive() {
             }],
         },
     });
+
+    // Test row count limit
+    let props = WriterProperties::builder()
+        .set_dictionary_enabled(false)
+        .set_data_page_row_count_limit(100)
+        .set_write_batch_size(100)
+        .build();
+
+    do_test(LayoutTest {
+        props,
+        batches: vec![batch],
+        layout: Layout {
+            row_groups: vec![RowGroup {
+                columns: vec![ColumnChunk {
+                    pages: (0..20)
+                        .map(|_| Page {
+                            rows: 100,

Review Comment:
   Not bad!



##########
parquet/src/file/properties.rs:
##########
@@ -271,19 +289,38 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets data page size limit.
+    /// Sets data page size limit
+    ///
+    /// Note: this is a best effort limit based on the write batch size
     pub fn set_data_pagesize_limit(mut self, value: usize) -> Self {
         self.data_pagesize_limit = value;
         self
     }
 
-    /// Sets dictionary page size limit.
+    /// Sets data page row count limit

Review Comment:
   ```suggestion
       /// Sets best effort maximum number of rows in a data page
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to