alamb commented on code in PR #7081:
URL: https://github.com/apache/arrow-datafusion/pull/7081#discussion_r1275361996
##########
datafusion/core/src/execution/context.rs:
##########
@@ -2307,11 +2307,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+--------------+--------------+-----------------+",
- "| SUM(test.c1) | SUM(test.c2) | COUNT(UInt8(1)) |",
- "+--------------+--------------+-----------------+",
- "| 10 | 110 | 20 |",
- "+--------------+--------------+-----------------+",
+ "+--------------+--------------+----------+",
Review Comment:
that is certainly much nicer ❤️
##########
datafusion/core/tests/sqllogictests/test_files/subquery.slt:
##########
@@ -846,65 +842,60 @@ query TT
explain SELECT t1_id, (SELECT count(*) + 2 as cnt_plus_2 FROM t2 WHERE
t2.t2_int = t1.t1_int having count(*) = 0) from t1
----
logical_plan
-Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN
Int64(2) AS cnt_plus_2 WHEN __scalar_sq_1.COUNT(UInt8(1)) != Int64(0) THEN NULL
ELSE __scalar_sq_1.cnt_plus_2 END AS cnt_plus_2
+Projection: t1.t1_id, __scalar_sq_1.cnt_plus_2 AS cnt_plus_2
--Left Join: t1.t1_int = __scalar_sq_1.t2_int
----TableScan: t1 projection=[t1_id, t1_int]
----SubqueryAlias: __scalar_sq_1
-------Projection: COUNT(UInt8(1)) + Int64(2) AS cnt_plus_2, t2.t2_int,
COUNT(UInt8(1)), __always_true
---------Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]],
aggr=[[COUNT(UInt8(1))]]
-----------TableScan: t2 projection=[t2_int]
+------Projection: COUNT(*) + Int64(2) AS cnt_plus_2, t2.t2_int
+--------Filter: COUNT(*) = Int64(0)
+----------Aggregate: groupBy=[[t2.t2_int]], aggr=[[COUNT(UInt8(1)) AS
COUNT(*)]]
+------------TableScan: t2 projection=[t2_int]
Review Comment:
If you are referring to `COUNT(*)` I think it should always be 0
##########
datafusion/core/src/execution/context.rs:
##########
@@ -2307,11 +2307,11 @@ mod tests {
assert_eq!(results.len(), 1);
let expected = vec![
- "+--------------+--------------+-----------------+",
- "| SUM(test.c1) | SUM(test.c2) | COUNT(UInt8(1)) |",
- "+--------------+--------------+-----------------+",
- "| 10 | 110 | 20 |",
- "+--------------+--------------+-----------------+",
+ "+--------------+--------------+----------+",
Review Comment:
that is certainly much nicer ❤️
##########
datafusion/core/tests/sqllogictests/test_files/subquery.slt:
##########
@@ -714,106 +714,103 @@ query TT
explain SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int = t1.t1_int)
from t1
----
logical_plan
-Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN
Int64(0) ELSE __scalar_sq_1.COUNT(UInt8(1)) END AS COUNT(UInt8(1))
+Projection: t1.t1_id, __scalar_sq_1.COUNT(*) AS COUNT(*)
--Left Join: t1.t1_int = __scalar_sq_1.t2_int
----TableScan: t1 projection=[t1_id, t1_int]
----SubqueryAlias: __scalar_sq_1
-------Projection: COUNT(UInt8(1)), t2.t2_int, __always_true
---------Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]],
aggr=[[COUNT(UInt8(1))]]
+------Projection: COUNT(*), t2.t2_int
+--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[COUNT(UInt8(1)) AS COUNT(*)]]
----------TableScan: t2 projection=[t2_int]
query II rowsort
SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int = t1.t1_int) from t1
----
11 1
-22 0
+22 NULL
Review Comment:
this seems wrong -- `COUNT(*)` should never produce `NULL` values even if it
doesn't see any non null inputs
The
```
CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE
__scalar_sq_1.COUNT(UInt8(1)) END AS COUNT(UInt8(1))
```
Appears to be doing that translation of NULL --> 0 but it has been lost in
this PR
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]