tustvold commented on code in PR #4713:
URL: https://github.com/apache/arrow-datafusion/pull/4713#discussion_r1056319292
##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -99,94 +100,114 @@ use super::metrics::ParquetFileMetrics;
///
/// So we can entirely skip rows 0->199 and 250->299 as we know they
/// can not contain rows that match the predicate.
-pub(crate) fn build_page_filter(
- pruning_predicate: Option<&PruningPredicate>,
- schema: SchemaRef,
- row_groups: &[usize],
- file_metadata: &ParquetMetaData,
- file_metrics: &ParquetFileMetrics,
-) -> Result<Option<RowSelection>> {
- // scoped timer updates on drop
- let _timer_guard = file_metrics.page_index_eval_time.timer();
- let page_index_predicates =
- extract_page_index_push_down_predicates(pruning_predicate, schema)?;
+#[derive(Debug)]
+pub(crate) struct PagePruningPredicate {
+ predicates: Vec<PruningPredicate>,
+}
- if page_index_predicates.is_empty() {
- return Ok(None);
+impl PagePruningPredicate {
+ /// Create a new [`PagePruningPredicate`]
+ pub fn try_new(expr: &Expr, schema: SchemaRef) -> Result<Self> {
+ let predicates = split_conjunction(expr)
+ .into_iter()
+ .filter_map(|predicate| match predicate.to_columns() {
+ Ok(columns) if columns.len() == 1 => {
+ match PruningPredicate::try_new(predicate.clone(),
schema.clone()) {
+ Ok(p) if !p.allways_true() => Some(Ok(p)),
Review Comment:
Because the expression gets simplified down to `true` and therefore now
refers to no columns 😅
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]