Ted-Jiang commented on code in PR #11483: URL: https://github.com/apache/datafusion/pull/11483#discussion_r1680620253
########## datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs: ########## @@ -146,52 +161,58 @@ impl PagePruningPredicate { } let page_index_predicates = &self.predicates; - let groups = file_metadata.row_groups(); + let groups = parquet_metadata.row_groups(); if groups.is_empty() { return access_plan; } - let (Some(file_offset_indexes), Some(file_page_indexes)) = - (file_metadata.offset_index(), file_metadata.column_index()) - else { - trace!( - "skip page pruning due to lack of indexes. Have offset: {}, column index: {}", - file_metadata.offset_index().is_some(), file_metadata.column_index().is_some() + if parquet_metadata.offset_index().is_none() + || parquet_metadata.column_index().is_none() + { + debug!( + "Can not prune pages due to lack of indexes. Have offset: {}, column index: {}", + parquet_metadata.offset_index().is_some(), parquet_metadata.column_index().is_some() ); return access_plan; }; // track the total number of rows that should be skipped let mut total_skip = 0; + // for each row group specified in the access plan let row_group_indexes = access_plan.row_group_indexes(); - for r in row_group_indexes { + for row_group_index in row_group_indexes { Review Comment: 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org