alamb commented on code in PR #10493:
URL: https://github.com/apache/datafusion/pull/10493#discussion_r1599952048


##########
datafusion/sql/src/expr/function.rs:
##########
@@ -229,12 +229,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 )?;
                 let order_by = (!order_by.is_empty()).then_some(order_by);
                 let args = self.function_args_to_expr(args, schema, 
planner_context)?;
-                // TODO: Support filter and distinct for UDAFs

Review Comment:
   nice



##########
datafusion/core/src/physical_optimizer/aggregate_statistics.rs:
##########
@@ -137,43 +134,48 @@ fn take_optimizable(node: &dyn ExecutionPlan) -> 
Option<Arc<dyn ExecutionPlan>>
     None
 }
 
-/// If this agg_expr is a count that is exactly defined in the statistics, 
return it.
-fn take_optimizable_table_count(
+/// If this agg_expr is a count that can be exactly derived from the 
statistics, return it.
+fn take_optimizable_column_and_table_count(
     agg_expr: &dyn AggregateExpr,
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
-    if let (&Precision::Exact(num_rows), Some(casted_expr)) = (
-        &stats.num_rows,
-        agg_expr.as_any().downcast_ref::<expressions::Count>(),
-    ) {
-        // TODO implementing Eq on PhysicalExpr would help a lot here
-        if casted_expr.expressions().len() == 1 {
-            if let Some(lit_expr) = casted_expr.expressions()[0]
-                .as_any()
-                .downcast_ref::<expressions::Literal>()
-            {
-                if lit_expr.value() == &COUNT_STAR_EXPANSION {
-                    return Some((
-                        ScalarValue::Int64(Some(num_rows as i64)),
-                        casted_expr.name().to_owned(),
-                    ));
+    let col_stats = &stats.column_statistics;
+    if let Some(agg_expr) = 
agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
+        if agg_expr.fun().name() == "COUNT" && !agg_expr.is_distinct() {
+            if let Precision::Exact(num_rows) = stats.num_rows {
+                let exprs = agg_expr.expressions();
+                if exprs.len() == 1 {
+                    // TODO optimize with exprs other than Column
+                    if let Some(col_expr) =
+                        exprs[0].as_any().downcast_ref::<expressions::Column>()
+                    {
+                        let current_val = 
&col_stats[col_expr.index()].null_count;
+                        if let &Precision::Exact(val) = current_val {
+                            return Some((
+                                ScalarValue::Int64(Some((num_rows - val) as 
i64)),
+                                agg_expr.name().to_string(),
+                            ));
+                        }
+                    } else if let Some(lit_expr) =
+                        
exprs[0].as_any().downcast_ref::<expressions::Literal>()
+                    {
+                        if lit_expr.value() == &COUNT_STAR_EXPANSION {
+                            return Some((
+                                ScalarValue::Int64(Some(num_rows as i64)),
+                                agg_expr.name().to_string(),
+                            ));
+                        }
+                    }
                 }
             }
         }
     }
-    None
-}
-
-/// If this agg_expr is a count that can be exactly derived from the 
statistics, return it.
-fn take_optimizable_column_count(
-    agg_expr: &dyn AggregateExpr,
-    stats: &Statistics,
-) -> Option<(ScalarValue, String)> {
-    let col_stats = &stats.column_statistics;
-    if let (&Precision::Exact(num_rows), Some(casted_expr)) = (
+    // TODO: Remove this after revmoing Builtin Count
+    else if let (&Precision::Exact(num_rows), Some(casted_expr)) = (
         &stats.num_rows,
         agg_expr.as_any().downcast_ref::<expressions::Count>(),
     ) {
+        // TODO implementing Eq on PhysicalExpr would help a lot here

Review Comment:
   
https://docs.rs/datafusion/latest/datafusion/physical_expr/trait.PhysicalExpr.html#
 seems to imply it is possible to to compare PhysicalExprs (perhaps 
`expr.eq(other_expr.as_any())` 🤔 )
   
   I don't know `PhysicalExpr` doesn't implement `PartialEq` directly 
   
   ```rust
   pub trait PhysicalExpr: ... PartialEq<dyn Any> {`
   ```



##########
datafusion/optimizer/src/analyzer/count_wildcard_rule.rs:
##########
@@ -54,23 +56,37 @@ fn is_wildcard(expr: &Expr) -> bool {
 }
 
 fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
-    matches!(
-        &aggregate_function.func_def,
-        AggregateFunctionDefinition::BuiltIn(
-            datafusion_expr::aggregate_function::AggregateFunction::Count,
-        )
-    ) && aggregate_function.args.len() == 1
-        && is_wildcard(&aggregate_function.args[0])
+    match aggregate_function {
+        AggregateFunction {
+            func_def: AggregateFunctionDefinition::UDF(udf),
+            args,
+            ..
+        } if udf.name() == "COUNT" && args.len() == 1 && is_wildcard(&args[0]) 
=> true,
+        AggregateFunction {
+            func_def:
+                AggregateFunctionDefinition::BuiltIn(
+                    
datafusion_expr::aggregate_function::AggregateFunction::Count,
+                ),
+            args,
+            ..
+        } if args.len() == 1 && is_wildcard(&args[0]) => true,
+        _ => false,
+    }
 }
 
 fn is_count_star_window_aggregate(window_function: &WindowFunction) -> bool {
-    matches!(
-        &window_function.fun,
+    let args = &window_function.args;
+    match window_function.fun {
         WindowFunctionDefinition::AggregateFunction(
-            datafusion_expr::aggregate_function::AggregateFunction::Count,
-        )
-    ) && window_function.args.len() == 1
-        && is_wildcard(&window_function.args[0])
+            aggregate_function::AggregateFunction::Count,
+        ) if args.len() == 1 && is_wildcard(&args[0]) => true,
+        WindowFunctionDefinition::AggregateUDF(ref udaf)
+            if udaf.name() == "COUNT" && args.len() == 1 && 
is_wildcard(&args[0]) =>

Review Comment:
   I wonder if we should document the `COUNT` somewhere. Also should it do a 
case insensitive comparison?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to