sdf-jkl commented on code in PR #9118:
URL: https://github.com/apache/arrow-rs/pull/9118#discussion_r2723222314


##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -2462,4 +2474,225 @@ mod tests {
             assert_eq!(batch.column(0).as_string(), &expected);
         }
     }
+
+    /// Test that bitmask-based row selection correctly handles page 
boundaries.
+    /// This test creates a parquet file with multiple small pages and 
verifies that
+    /// when using Mask policy, pages that are skipped entirely are handled 
correctly.
+    #[tokio::test]
+    async fn test_bitmask_page_aware_selection_async() {
+        let first_value: i64 = 1111;
+        let last_value: i64 = 9999;
+        let num_rows: usize = 20;
+
+        // Create a file with 20 rows, ~2 rows per page = 10 pages
+        // Selection will be: first row, skip middle rows, last row
+        // This forces the reader to handle skipped pages correctly
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("key", DataType::Int64, false),
+            Field::new("value", DataType::Int64, false),
+        ]));
+
+        let mut int_values: Vec<i64> = (0..num_rows as i64).collect();
+        int_values[0] = first_value;
+        int_values[num_rows - 1] = last_value;
+        let keys = Int64Array::from(int_values.clone());
+        let values = Int64Array::from(int_values.clone());
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(keys) as ArrayRef, Arc::new(values) as ArrayRef],
+        )
+        .unwrap();
+
+        // Configure small pages to create multiple page boundaries
+        let props = WriterProperties::builder()
+            .set_write_batch_size(2)
+            .set_data_page_row_count_limit(2)
+            .build();

Review Comment:
   It's actually the same in the new test too...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to