tustvold commented on code in PR #5600:
URL: https://github.com/apache/arrow-rs/pull/5600#discussion_r1557409572


##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -1857,4 +1857,92 @@ mod tests {
             assert_eq!(total_rows, expected);
         }
     }
+
+    #[tokio::test]
+    async fn test_row_filter_nested() {
+        let a = StringArray::from_iter_values(["a", "b", "b", "b", "c", "c"]);
+        let b = StructArray::from(vec![
+            (
+                Arc::new(Field::new("aa", DataType::Utf8, true)),
+                Arc::new(StringArray::from(vec!["a", "b", "b", "b", "c", 
"c"])) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("bb", DataType::Utf8, true)),
+                Arc::new(StringArray::from(vec!["1", "2", "3", "4", "5", 
"6"])) as ArrayRef,
+            ),
+        ]);
+        let c = Int32Array::from_iter(0..6);
+        let data = RecordBatch::try_from_iter([
+            ("a", Arc::new(a) as ArrayRef),
+            ("b", Arc::new(b) as ArrayRef),
+            ("c", Arc::new(c) as ArrayRef),
+        ])
+        .unwrap();
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buf, data.schema(), 
None).unwrap();
+        writer.write(&data).unwrap();
+        writer.close().unwrap();
+
+        let data: Bytes = buf.into();
+        let metadata = parse_metadata(&data).unwrap();
+        let parquet_schema = metadata.file_metadata().schema_descr_ptr();
+
+        let test = TestReader {
+            data,
+            metadata: Arc::new(metadata),
+            requests: Default::default(),
+        };
+        let requests = test.requests.clone();
+
+        let a_scalar = StringArray::from_iter_values(["b"]);
+        let a_filter = ArrowPredicateFn::new(
+            ProjectionMask::leaves(&parquet_schema, vec![0]),
+            move |batch| eq(batch.column(0), &Scalar::new(&a_scalar)),
+        );
+
+        let b_scalar = StringArray::from_iter_values(["4"]);
+        let b_filter = ArrowPredicateFn::new(
+            ProjectionMask::leaves(&parquet_schema, vec![2]),
+            move |batch| {
+                // Filter on the second element of the struct.
+                let struct_array = batch
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<StructArray>()
+                    .unwrap();
+                eq(struct_array.column(0), &Scalar::new(&b_scalar))

Review Comment:
   I think the challenge here is when the nested arrays are either repeated or 
nullable, in such a case trying to interpret the leaves in isolation isn't 
necessarily meaningful



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to