This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a2b9ab82f8 Minor: add `with_estimated_selectivity ` to Precision
(#8177)
a2b9ab82f8 is described below
commit a2b9ab82f8b9905e538f1b5ff27345998ab98fb9
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Nov 17 14:15:17 2023 -0500
Minor: add `with_estimated_selectivity ` to Precision (#8177)
* Minor: add apply_filter to Precision
* fix: use inexact
* Rename to with_estimated_selectivity
---
datafusion/common/src/stats.rs | 9 +++++++++
datafusion/physical-plan/src/filter.rs | 25 ++++++++-----------------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index 1c7a4fd4d5..7ad8992ca9 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -151,6 +151,15 @@ impl Precision<usize> {
(_, _) => Precision::Absent,
}
}
+
+ /// Return the estimate of applying a filter with estimated selectivity
+ /// `selectivity` to this Precision. A selectivity of `1.0` means that all
+ /// rows are selected. A selectivity of `0.5` means half the rows are
+ /// selected. Will always return inexact statistics.
+ pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
+ self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
+ .to_inexact()
+ }
}
impl Precision<ScalarValue> {
diff --git a/datafusion/physical-plan/src/filter.rs
b/datafusion/physical-plan/src/filter.rs
index 597e1d523a..107c95eff7 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -200,15 +200,12 @@ impl ExecutionPlan for FilterExec {
// assume filter selects 20% of rows if we cannot do anything
smarter
// tracking issue for making this configurable:
// https://github.com/apache/arrow-datafusion/issues/8133
- let selectivity = 0.2_f32;
- let mut stats = input_stats.into_inexact();
- if let Precision::Inexact(n) = stats.num_rows {
- stats.num_rows = Precision::Inexact((selectivity * n as f32)
as usize);
- }
- if let Precision::Inexact(n) = stats.total_byte_size {
- stats.total_byte_size =
- Precision::Inexact((selectivity * n as f32) as usize);
- }
+ let selectivity = 0.2_f64;
+ let mut stats = input_stats.clone().into_inexact();
+ stats.num_rows =
stats.num_rows.with_estimated_selectivity(selectivity);
+ stats.total_byte_size = stats
+ .total_byte_size
+ .with_estimated_selectivity(selectivity);
return Ok(stats);
}
@@ -222,14 +219,8 @@ impl ExecutionPlan for FilterExec {
// Estimate (inexact) selectivity of predicate
let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
- let num_rows = match num_rows.get_value() {
- Some(nr) => Precision::Inexact((*nr as f64 * selectivity).ceil()
as usize),
- None => Precision::Absent,
- };
- let total_byte_size = match total_byte_size.get_value() {
- Some(tbs) => Precision::Inexact((*tbs as f64 * selectivity).ceil()
as usize),
- None => Precision::Absent,
- };
+ let num_rows = num_rows.with_estimated_selectivity(selectivity);
+ let total_byte_size =
total_byte_size.with_estimated_selectivity(selectivity);
let column_statistics = collect_new_statistics(
&input_stats.column_statistics,