neilconway commented on code in PR #22319:
URL: https://github.com/apache/datafusion/pull/22319#discussion_r3281965410
##########
datafusion/functions-aggregate/src/approx_distinct.rs:
##########
@@ -381,6 +381,14 @@ impl AggregateUDFImpl for ApproxDistinct {
Ok(DataType::UInt64)
}
+ fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
+ Ok(ScalarValue::UInt64(Some(0)))
+ }
+
+ fn is_nullable(&self) -> bool {
+ false
+ }
Review Comment:
We can do something similar for `regr_count` as well, I think?
##########
datafusion/optimizer/src/decorrelate.rs:
##########
@@ -513,15 +513,16 @@ fn agg_exprs_evaluation_result_on_empty_batch(
.clone()
.transform_up(|expr| {
let new_expr = match expr {
- Expr::AggregateFunction(expr::AggregateFunction { func, ..
}) => {
- if func.name() == "count" {
- Transformed::yes(Expr::Literal(
- ScalarValue::Int64(Some(0)),
- None,
- ))
- } else {
- Transformed::yes(Expr::Literal(ScalarValue::Null,
None))
- }
+ Expr::AggregateFunction(expr::AggregateFunction { func,
params }) => {
+ let fields = params
+ .args
+ .iter()
+ .map(|arg| arg.to_field(schema.as_ref()).map(|(_,
f)| f))
+ .collect::<Result<Vec<_>>>()?;
+ let return_field = func.return_field(&fields)?;
+ let default_value =
+ func.default_value(return_field.data_type())?;
+ Transformed::yes(Expr::Literal(default_value, None))
}
_ => Transformed::no(expr),
Review Comment:
Simpler:
```rust
let new_expr = if let Expr::AggregateFunction(agg) = &expr {
let return_type = expr.get_type(schema.as_ref())?;
let default_value = agg.func.default_value(&return_type)?;
Transformed::yes(Expr::Literal(default_value, None))
} else {
Transformed::no(expr)
};
```
##########
datafusion/functions-aggregate/src/regr.rs:
##########
@@ -457,6 +457,14 @@ impl AggregateUDFImpl for Regr {
}
}
+ fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
+ if self.regr_type == RegrType::Count {
+ Ok(ScalarValue::UInt64(Some(0)))
+ } else {
+ ScalarValue::try_from(data_type)
Review Comment:
`Ok(ScalarValue::Float64(None))` would be more clear, I think, and would
mirror `return_type`.
##########
datafusion/sqllogictest/test_files/subquery.slt:
##########
@@ -872,6 +872,88 @@ SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int =
t1.t1_int) as cnt from
33 3
44 0
+#correlated_scalar_subquery_non_count_agg_empty_defaults
+query III rowsort
+SELECT
+ t1_id,
+ (
+ SELECT regr_count(1.0, 1.0)
+ FROM t2
+ WHERE t2.t2_int = t1.t1_int
+ ) AS r,
+ (
+ SELECT approx_distinct(t2.t2_id)
+ FROM t2
+ WHERE t2.t2_int = t1.t1_int
+ ) AS d
+FROM t1
+----
+11 1 1
+22 0 0
+33 3 3
+44 0 0
+
+query III rowsort
+SELECT
+ t1_id,
+ (
+ SELECT regr_count(1.0, 1.0) AS rc
+ FROM t2
+ WHERE t2.t2_int = t1.t1_int
+ ) AS r,
+ (
+ SELECT approx_distinct(t2.t2_id) AS ad
+ FROM t2
+ WHERE t2.t2_int = t1.t1_int
+ ) AS d
+FROM t1
+----
+11 1 1
+22 0 0
+33 3 3
+44 0 0
Review Comment:
This test seems redundant with the first test query?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]