Tanel Kiis created SPARK-34876:
----------------------------------
Summary: Non-nullable aggregates can return NULL in a correlated
subquery
Key: SPARK-34876
URL: https://issues.apache.org/jira/browse/SPARK-34876
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 3.2.0
Reporter: Tanel Kiis
{code:title=query}
SELECT t1a,
(SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2,
(SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2,
(SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a)
approx_count_distinct_t2,
(SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2,
(SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2,
(SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a)
collect_set_t2
FROM t1;
{code}
{code:title=Result}
val1a 0 0 NULL NULL NULL NULL
val1a 0 0 NULL NULL NULL NULL
val1a 0 0 NULL NULL NULL NULL
val1a 0 0 NULL NULL NULL NULL
val1b 6 6 3 [19,119,319,19,19,19] [19,119,319]
0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001
val1c 2 2 2 [219,19] [219,19]
0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001
val1d 0 0 NULL NULL NULL NULL
val1d 0 0 NULL NULL NULL NULL
val1d 0 0 NULL NULL NULL NULL
val1e 1 1 1 [19] [19]
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
val1e 1 1 1 [19] [19]
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
val1e 1 1 1 [19] [19]
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]