findepi commented on code in PR #11299: URL: https://github.com/apache/datafusion/pull/11299#discussion_r1667382661
########## datafusion/sqllogictest/test_files/aggregate.slt: ########## @@ -2744,28 +2735,98 @@ SELECT ARRAY_AGG([1]) ---- [[1]] -# test_approx_percentile_cont_decimal_support -query TI -SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 +# test array_agg with no row qualified +statement ok +create table t(a int, b float, c bigint) as values (1, 1.2, 2); + +query ? +select array_agg(a) from t where a > 2; ---- -a 4 -b 5 -c 4 -d 4 -e 4 +NULL +query ? +select array_agg(b) from t where b > 3.1; +---- +NULL -# array_agg_zero query ? -SELECT ARRAY_AGG([]); +select array_agg(c) from t where c > 3; ---- -[[]] +NULL -# array_agg_one +query ?I +select array_agg(c), count(1) from t where c > 3; +---- +NULL 0 + +# returns 0 rows if group by is applied query ? -SELECT ARRAY_AGG([1]); +select array_agg(a) from t where a > 3 group by a; ---- -[[1]] + +query ?I +select array_agg(a), count(1) from t where a > 3 group by a; +---- + +# TODO: Expect NULL, got empty list +query ? +select array_agg(distinct a) from t where a > 3; +---- +[] + +query ?I +select array_agg(distinct a), count(1) from t where a > 3; +---- +NULL 0 + +# returns 0 rows if group by is applied +query ? +select array_agg(distinct a) from t where a > 3 group by a; +---- + +query ?I +select array_agg(distinct a), count(1) from t where a > 3 group by a; +---- + +# test order sensitive array agg +query ? +select array_agg(a order by a) from t where a > 3; +---- +NULL + +query ? +select array_agg(a order by a) from t where a > 3 group by a; +---- + +query ?I +select array_agg(a order by a), count(1) from t where a > 3 group by a; +---- + +statement ok +drop table t; + +# test with no values Review Comment: add `array_agg(distinct` case on empty table ########## datafusion/sqllogictest/test_files/aggregate.slt: ########## @@ -1753,31 +1753,12 @@ NULL 4 29 1.260869565217 123 -117 23 NULL 5 -194 -13.857142857143 118 -101 14 NULL NULL 781 7.81 125 -117 100 -# TODO: array_agg_distinct output is non-deterministic -- rewrite with array_sort(list_sort) -# unnest is also not available, so manually unnesting via CROSS JOIN -# additional count(1) forces array_agg_distinct instead of array_agg over aggregated by c2 data -# +# select with count to forces array_agg_distinct function, since single distinct expression is converted to group by by optimizer # csv_query_array_agg_distinct -query III -WITH indices AS ( - SELECT 1 AS idx UNION ALL - SELECT 2 AS idx UNION ALL - SELECT 3 AS idx UNION ALL - SELECT 4 AS idx UNION ALL - SELECT 5 AS idx -) -SELECT data.arr[indices.idx] as element, array_length(data.arr) as array_len, dummy -FROM ( - SELECT array_agg(distinct c2) as arr, count(1) as dummy FROM aggregate_test_100 -) data - CROSS JOIN indices -ORDER BY 1 ----- -1 5 100 -2 5 100 -3 5 100 -4 5 100 -5 5 100 Review Comment: why removed? ########## datafusion/sqllogictest/test_files/aggregate.slt: ########## @@ -2744,28 +2735,84 @@ SELECT ARRAY_AGG([1]) ---- [[1]] -# test_approx_percentile_cont_decimal_support -query TI -SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 +# test array_agg with no row qualified +statement ok +create table t(a int, b float, c bigint) as values (1, 1.2, 2); + +query ? +select array_agg(a) from t where a > 2; ---- -a 4 -b 5 -c 4 -d 4 -e 4 +NULL +query ? +select array_agg(b) from t where b > 3.1; +---- +NULL -# array_agg_zero query ? -SELECT ARRAY_AGG([]); +select array_agg(c) from t where c > 3; ---- -[[]] +NULL -# array_agg_one +query ?I +select array_agg(c), count(1) from t where c > 3; +---- +NULL 0 + +# returns 0 rows if group by is applied query ? -SELECT ARRAY_AGG([1]); +select array_agg(a) from t where a > 3 group by a; ---- -[[1]] + +query ?I +select array_agg(a), count(1) from t where a > 3 group by a; +---- + +# TODO: Expect NULL, got empty list +query ? +select array_agg(distinct a) from t where a > 3; Review Comment: Thanks @jayzhan211 for calling this out! Given there are no rows with `a > 3`, the aggregation input relation `from t where a > 3` is empty, so this should produce same result as array_agg from empty table, ie `NULL`. currently it shows `[]` as a result. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
