This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a2b9ab82f8 Minor: add `with_estimated_selectivity ` to Precision 
(#8177)
a2b9ab82f8 is described below

commit a2b9ab82f8b9905e538f1b5ff27345998ab98fb9
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Nov 17 14:15:17 2023 -0500

    Minor: add `with_estimated_selectivity ` to Precision (#8177)
    
    * Minor: add apply_filter to Precision
    
    * fix: use inexact
    
    * Rename to with_estimated_selectivity
---
 datafusion/common/src/stats.rs         |  9 +++++++++
 datafusion/physical-plan/src/filter.rs | 25 ++++++++-----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index 1c7a4fd4d5..7ad8992ca9 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -151,6 +151,15 @@ impl Precision<usize> {
             (_, _) => Precision::Absent,
         }
     }
+
+    /// Return the estimate of applying a filter with estimated selectivity
+    /// `selectivity` to this Precision. A selectivity of `1.0` means that all
+    /// rows are selected. A selectivity of `0.5` means half the rows are
+    /// selected. Will always return inexact statistics.
+    pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
+        self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
+            .to_inexact()
+    }
 }
 
 impl Precision<ScalarValue> {
diff --git a/datafusion/physical-plan/src/filter.rs 
b/datafusion/physical-plan/src/filter.rs
index 597e1d523a..107c95eff7 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -200,15 +200,12 @@ impl ExecutionPlan for FilterExec {
             // assume filter selects 20% of rows if we cannot do anything 
smarter
             // tracking issue for making this configurable:
             // https://github.com/apache/arrow-datafusion/issues/8133
-            let selectivity = 0.2_f32;
-            let mut stats = input_stats.into_inexact();
-            if let Precision::Inexact(n) = stats.num_rows {
-                stats.num_rows = Precision::Inexact((selectivity * n as f32) 
as usize);
-            }
-            if let Precision::Inexact(n) = stats.total_byte_size {
-                stats.total_byte_size =
-                    Precision::Inexact((selectivity * n as f32) as usize);
-            }
+            let selectivity = 0.2_f64;
+            let mut stats = input_stats.clone().into_inexact();
+            stats.num_rows = 
stats.num_rows.with_estimated_selectivity(selectivity);
+            stats.total_byte_size = stats
+                .total_byte_size
+                .with_estimated_selectivity(selectivity);
             return Ok(stats);
         }
 
@@ -222,14 +219,8 @@ impl ExecutionPlan for FilterExec {
 
         // Estimate (inexact) selectivity of predicate
         let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
-        let num_rows = match num_rows.get_value() {
-            Some(nr) => Precision::Inexact((*nr as f64 * selectivity).ceil() 
as usize),
-            None => Precision::Absent,
-        };
-        let total_byte_size = match total_byte_size.get_value() {
-            Some(tbs) => Precision::Inexact((*tbs as f64 * selectivity).ceil() 
as usize),
-            None => Precision::Absent,
-        };
+        let num_rows = num_rows.with_estimated_selectivity(selectivity);
+        let total_byte_size = 
total_byte_size.with_estimated_selectivity(selectivity);
 
         let column_statistics = collect_new_statistics(
             &input_stats.column_statistics,

Reply via email to