alamb commented on code in PR #10802:
URL: https://github.com/apache/datafusion/pull/10802#discussion_r1627707292


##########
datafusion-examples/examples/parquet_index.rs:
##########
@@ -518,21 +518,17 @@ impl ParquetMetadataIndexBuilder {
 
         // extract the parquet statistics from the file's footer
         let metadata = reader.metadata();
+        let row_groups = metadata.row_groups();
 
         // Extract the min/max values for each row group from the statistics
-        let row_counts = StatisticsConverter::row_counts(reader.metadata())?;
-        let value_column_mins = StatisticsConverter::try_new(
+        let converter = StatisticsConverter::try_new(

Review Comment:
   This is a pretty good example of how the statistics API changed. FYI 
@NGA-TRAN 



##########
datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs:
##########
@@ -136,32 +133,35 @@ impl RowGroupSet {
         metrics: &ParquetFileMetrics,
     ) {
         assert_eq!(groups.len(), self.len());
-        for (idx, metadata) in groups.iter().enumerate() {
-            if !self.should_scan(idx) {
-                continue;
-            }
-            let pruning_stats = RowGroupPruningStatistics {
-                parquet_schema,
-                row_group_metadata: metadata,
-                arrow_schema,
-            };
-            match predicate.prune(&pruning_stats) {
-                Ok(values) => {
-                    // NB: false means don't scan row group
-                    if !values[0] {
+        // Indexes of row groups still to scan

Review Comment:
   Here is the change to prune all row groups  with one call to 
`PruningPredicate::prune` rather than one call per row group



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to