alamb commented on code in PR #8554:
URL: https://github.com/apache/arrow-rs/pull/8554#discussion_r2406750206
##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -2611,4 +2616,96 @@ mod tests {
// error we want to reproduce.
let _result: Vec<_> = stream.try_collect().await.unwrap();
}
+
+ #[tokio::test]
+ async fn test_predicate_cache_disabled() {
+ let k = Int32Array::from_iter_values(0..10);
+ let data = RecordBatch::try_from_iter([("k", Arc::new(k) as
ArrayRef)]).unwrap();
+
+ let mut buf = Vec::new();
+ // both the page row limit and batch size are set to 1 to create one
page per row
+ let props = WriterProperties::builder()
+ .set_data_page_row_count_limit(1)
+ .set_write_batch_size(1)
+ .set_max_row_group_size(10)
+ .set_write_page_header_statistics(true)
+ .build();
+ let mut writer = ArrowWriter::try_new(&mut buf, data.schema(),
Some(props)).unwrap();
+ writer.write(&data).unwrap();
+ writer.close().unwrap();
+
+ let data = Bytes::from(buf);
+ let metadata = ParquetMetaDataReader::new()
+ .with_page_index_policy(PageIndexPolicy::Required)
+ .parse_and_finish(&data)
+ .unwrap();
+ let parquet_schema = metadata.file_metadata().schema_descr_ptr();
+
+ // the filter is not clone-able, so we use a lambda to simplify
Review Comment:
yeah, this is something that makes the filters very tricky to handle
internally. Nothing to change for this PR, I am just observing
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]