buraksenn commented on code in PR #21473:
URL: https://github.com/apache/datafusion/pull/21473#discussion_r3098809724


##########
datafusion/physical-plan/src/filter.rs:
##########
@@ -308,44 +309,68 @@ impl FilterExec {
         &self.projection
     }
 
-    /// Calculates `Statistics` for `FilterExec`, by applying selectivity 
(either default, or estimated) to input statistics.
+    /// Calculates `Statistics` for `FilterExec`, by applying selectivity
+    /// (either default, or estimated) to input statistics.
+    ///
+    /// Equality predicates (`col = literal`) set NDV to `Exact(1)`, or
+    /// `Exact(0)` when the predicate is contradictory (e.g. `a = 1 AND a = 
2`).
     pub(crate) fn statistics_helper(
         schema: &SchemaRef,
         input_stats: Statistics,
         predicate: &Arc<dyn PhysicalExpr>,
         default_selectivity: u8,
     ) -> Result<Statistics> {
-        if !check_support(predicate, schema) {
-            let selectivity = default_selectivity as f64 / 100.0;
-            let mut stats = input_stats.to_inexact();
-            stats.num_rows = 
stats.num_rows.with_estimated_selectivity(selectivity);
-            stats.total_byte_size = stats
-                .total_byte_size
-                .with_estimated_selectivity(selectivity);
-            return Ok(stats);
-        }
+        let (eq_columns, is_infeasible) = collect_equality_columns(predicate);
 
         let num_rows = input_stats.num_rows;
         let total_byte_size = input_stats.total_byte_size;
-        let input_analysis_ctx =
-            AnalysisContext::try_from_statistics(schema, 
&input_stats.column_statistics)?;
 
-        let analysis_ctx = analyze(predicate, input_analysis_ctx, schema)?;
+        let (selectivity, mut column_statistics) = if is_infeasible {
+            (0.0, input_stats.to_inexact().column_statistics)
+        } else if !check_support(predicate, schema) {
+            (
+                default_selectivity as f64 / 100.0,
+                input_stats.to_inexact().column_statistics,
+            )
+        } else {
+            let input_analysis_ctx = AnalysisContext::try_from_statistics(
+                schema,
+                &input_stats.column_statistics,
+            )?;
+            let analysis_ctx = analyze(predicate, input_analysis_ctx, schema)?;
+            let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
+            let filtered_num_rows = 
num_rows.with_estimated_selectivity(selectivity);

Review Comment:
   now I've only compute this once thanks



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to