alamb commented on code in PR #18321:
URL: https://github.com/apache/datafusion/pull/18321#discussion_r2469206804
##########
datafusion-examples/examples/json_shredding.rs:
##########
@@ -142,7 +142,7 @@ async fn main() -> Result<()> {
.await?;
let plan = format!("{}",
arrow::util::pretty::pretty_format_batches(&batches)?);
println!("{plan}");
- assert_contains!(&plan, "row_groups_pruned_statistics=1");
+ assert_contains!(&plan, "row_groups_pruned_statistics=2 total → 1
matched");
Review Comment:
this is so much clearer ❤️
##########
datafusion/core/tests/parquet/mod.rs:
##########
@@ -126,79 +126,95 @@ struct TestOutput {
impl TestOutput {
/// retrieve the value of the named metric, if any
fn metric_value(&self, metric_name: &str) -> Option<usize> {
+ if let Some((pruned, _matched)) = self.pruning_metric(metric_name) {
+ return Some(pruned);
+ }
+
self.parquet_metrics
.sum(|metric| metric.value().name() == metric_name)
- .map(|v| v.as_usize())
- }
-
- /// The number of times the pruning predicate evaluation errors
- fn predicate_evaluation_errors(&self) -> Option<usize> {
- self.metric_value("predicate_evaluation_errors")
- }
-
- /// The number of row_groups matched by bloom filter
- fn row_groups_matched_bloom_filter(&self) -> Option<usize> {
- self.metric_value("row_groups_matched_bloom_filter")
- }
-
- /// The number of row_groups pruned by bloom filter
- fn row_groups_pruned_bloom_filter(&self) -> Option<usize> {
- self.metric_value("row_groups_pruned_bloom_filter")
- }
-
- /// The number of row_groups matched by statistics
- fn row_groups_matched_statistics(&self) -> Option<usize> {
- self.metric_value("row_groups_matched_statistics")
- }
-
- /// The number of row_groups pruned by statistics
- fn row_groups_pruned_statistics(&self) -> Option<usize> {
- self.metric_value("row_groups_pruned_statistics")
+ .map(|v| match v {
+ MetricValue::PruningMetrics {
+ pruning_metrics, ..
+ } => pruning_metrics.pruned(),
+ _ => v.as_usize(),
+ })
}
- /// Metric `files_ranges_pruned_statistics` tracks both pruned and matched
count,
- /// for testing purpose, here it only aggregate the `pruned` count.
- fn files_ranges_pruned_statistics(&self) -> Option<usize> {
+ fn pruning_metric(&self, metric_name: &str) -> Option<(usize, usize)> {
Review Comment:
I think it would help to document here what the two usize return values were
(total rows pruned and total rows matched)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]