yjshen commented on a change in pull request #2048:
URL: https://github.com/apache/arrow-datafusion/pull/2048#discussion_r832248685
##########
File path: datafusion/src/physical_plan/file_format/parquet.rs
##########
@@ -418,31 +407,33 @@ impl<'a> PruningStatistics for
RowGroupPruningStatistics<'a> {
fn build_row_group_predicate(
pruning_predicate: &PruningPredicate,
metrics: ParquetFileMetrics,
- row_group_metadata: &[RowGroupMetaData],
-) -> Box<dyn Fn(&RowGroupMetaData, usize) -> bool> {
- let parquet_schema = pruning_predicate.schema().as_ref();
-
- let pruning_stats = RowGroupPruningStatistics {
- row_group_metadata,
- parquet_schema,
- };
- let predicate_values = pruning_predicate.prune(&pruning_stats);
-
- match predicate_values {
- Ok(values) => {
- // NB: false means don't scan row group
- let num_pruned = values.iter().filter(|&v| !*v).count();
- metrics.row_groups_pruned.add(num_pruned);
- Box::new(move |_, i| values[i])
- }
- // stats filter array could not be built
- // return a closure which will not filter out any row groups
- Err(e) => {
- debug!("Error evaluating row group predicate values {}", e);
- metrics.predicate_evaluation_errors.add(1);
- Box::new(|_r, _i| true)
- }
- }
+) -> Box<dyn FnMut(&RowGroupMetaData, usize) -> bool> {
+ let pruning_predicate = pruning_predicate.clone();
+ Box::new(
+ move |row_group_metadata: &RowGroupMetaData, _i: usize| -> bool {
+ let parquet_schema = pruning_predicate.schema().as_ref();
+ let pruning_stats = RowGroupPruningStatistics {
+ row_group_metadata,
+ parquet_schema,
+ };
+ let predicate_values = pruning_predicate.prune(&pruning_stats);
Review comment:
How about we change ReadOptions like:
```rust
pub struct ReadOptions {
predicates: Vec<Box<dyn Fn(&[RowGroupMetaData]) -> vec<bool>>>,
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]