This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8446b6d3da36 [SPARK-54687][SQL] Add golden file with generators
resolution edge cases
8446b6d3da36 is described below
commit 8446b6d3da3638522148780f4e3a667d47fc3f4e
Author: Mikhail Nikoliukin <[email protected]>
AuthorDate: Fri Dec 12 09:48:16 2025 +0800
[SPARK-54687][SQL] Add golden file with generators resolution edge cases
### What changes were proposed in this pull request?
Just a new golden file covering some edge cases of generators' resolution.
### Why are the changes needed?
Generators' resolution sometimes has very surprising behavior/gaps/bugs. It
would be good to cover them with tests for now
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
New tests
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor 2.2.14
Closes #53447 from mikhailnik-db/add-generators-tests.
Authored-by: Mikhail Nikoliukin <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../generators-resolution-edge-cases.sql.out | 462 +++++++++++++++++++++
.../inputs/generators-resolution-edge-cases.sql | 129 ++++++
.../generators-resolution-edge-cases.sql.out | 462 +++++++++++++++++++++
3 files changed, 1053 insertions(+)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
new file mode 100644
index 000000000000..2172fbe39a5b
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
@@ -0,0 +1,462 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT explode(explode(array(array(1, 2, 3))))
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "expression" : "\"explode(explode(array(array(1, 2, 3))))\""
+ }
+}
+
+
+-- !query
+SELECT 1 + explode(array(1, 2, 3))
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "expression" : "\"(1 + explode(array(1, 2, 3)))\""
+ }
+}
+
+
+-- !query
+SELECT explode(array(0, 1, 2)), explode(array(10, 20))
+-- !query analysis
+Project [col#x, col#x]
++- Generate explode(array(10, 20)), false, [col#x]
+ +- Generate explode(array(0, 1, 2)), false, [col#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
+-- !query analysis
+Project [col#x, col#x]
++- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 as
double))), false, [col#x]
+ +- Generate explode(array(10, 20)), false, [col#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(1, 2)), explode(array(3, 4)), count(*)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "generators" : "\"explode(array(1, 2))\", \"explode(array(3, 4))\"",
+ "num" : "2"
+ }
+}
+
+
+-- !query
+SELECT explode(array(1, 2)) AS a, posexplode(array('x', 'y', 'z')) AS (pos, b)
+-- !query analysis
+Project [a#x, pos#x, b#x]
++- Generate posexplode(array(x, y, z)), false, [pos#x, b#x]
+ +- Generate explode(array(1, 2)), false, [a#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val FROM range(5)
+-- !query analysis
+Project [val#xL]
++- Generate explode(_gen_input_0#x), false, [val#xL]
+ +- Aggregate [collect_list(id#xL, 0, 0) AS _gen_input_0#x]
+ +- Range (0, 5, step=1)
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val, count(*) AS cnt FROM range(3)
+-- !query analysis
+Project [val#xL, cnt#xL]
++- Generate explode(_gen_input_0#x), false, [val#xL]
+ +- Aggregate [collect_list(id#xL, 0, 0) AS _gen_input_0#x, count(1) AS
cnt#xL]
+ +- Range (0, 3, step=1)
+
+
+-- !query
+SELECT id, explode(array(1, 2, 3)) AS array_element
+FROM (VALUES (1), (2)) AS t(id)
+GROUP BY id, array_element
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`array_element`",
+ "proposal" : "`id`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 98,
+ "stopIndex" : 110,
+ "fragment" : "array_element"
+ } ]
+}
+
+
+-- !query
+SELECT id, posexplode_outer(CASE WHEN id = 1 THEN array('a', 'b') ELSE null
END) AS (pos, val)
+FROM range(3)
+-- !query analysis
+Project [id#xL, pos#x, val#x]
++- Generate posexplode(CASE WHEN (id#xL = cast(1 as bigint)) THEN array(a, b)
ELSE cast(null as array<string>) END), true, [pos#x, val#x]
+ +- Range (0, 3, step=1)
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2, 3)) AS col)
+SELECT * FROM cte
+UNION ALL
+SELECT * FROM cte
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias cte
+: +- Project [col#x]
+: +- Generate explode(array(1, 2, 3)), false, [col#x]
+: +- OneRowRelation
++- Union false, false
+ :- Project [col#x]
+ : +- SubqueryAlias cte
+ : +- CTERelationRef xxxx, true, [col#x], false, false
+ +- Project [col#x]
+ +- SubqueryAlias cte
+ +- CTERelationRef xxxx, true, [col#x], false, false
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2)) AS col)
+SELECT t1.col, t2.col FROM cte t1 JOIN cte t2 ON t1.col = t2.col
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias cte
+: +- Project [col#x]
+: +- Generate explode(array(1, 2)), false, [col#x]
+: +- OneRowRelation
++- Project [col#x, col#x]
+ +- Join Inner, (col#x = col#x)
+ :- SubqueryAlias t1
+ : +- SubqueryAlias cte
+ : +- CTERelationRef xxxx, true, [col#x], false, false
+ +- SubqueryAlias t2
+ +- SubqueryAlias cte
+ +- CTERelationRef xxxx, true, [col#x], false, false
+
+
+-- !query
+SELECT * FROM (SELECT explode(array(1, 2, 3)) AS val) t WHERE val > 1
+-- !query analysis
+Project [val#x]
++- Filter (val#x > 1)
+ +- SubqueryAlias t
+ +- Project [val#x]
+ +- Generate explode(array(1, 2, 3)), false, [val#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT *
+FROM (SELECT * FROM (SELECT explode(array(1, 2, 3, 4, 5)) AS val) WHERE val >
1)
+WHERE val < 5
+-- !query analysis
+Project [val#x]
++- Filter (val#x < 5)
+ +- SubqueryAlias __auto_generated_subquery_name
+ +- Project [val#x]
+ +- Filter (val#x > 1)
+ +- SubqueryAlias __auto_generated_subquery_name
+ +- Project [val#x]
+ +- Generate explode(array(1, 2, 3, 4, 5)), false, [val#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(arr) AS package
+FROM (VALUES(array('a', 'b'))) AS t(arr)
+GROUP BY ALL
+-- !query analysis
+Project [package#x]
++- Generate explode(_gen_input_0#x), false, [package#x]
+ +- Aggregate [arr#x], [arr#x AS _gen_input_0#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS arr#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(collect_list(a)) AS val
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+-- !query analysis
+Project [val#x]
++- Generate explode(_gen_input_0#x), false, [val#x]
+ +- Aggregate [collect_list(a#x, 0, 0) AS _gen_input_0#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS a#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT stack(2, id + 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+GROUP BY ALL
+-- !query analysis
+Project [col0#xL]
++- Generate stack(2, _gen_input_1#xL, _gen_input_2#xL), false, [col0#xL]
+ +- Aggregate [(cast(id#x as bigint) + 10)], [(cast(id#x as bigint) + 10) AS
_gen_input_1#xL, count(val#x) AS _gen_input_2#xL]
+ +- SubqueryAlias t
+ +- Project [col1#x AS id#x, col2#x AS val#x]
+ +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT id, explode(arr)
+FROM (VALUES(42, array('a', 'b', 'c'))) AS t(id, arr)
+GROUP BY id, 2
+-- !query analysis
+Project [id#x, col#x]
++- Generate explode(_gen_input_0#x), false, [col#x]
+ +- Aggregate [id#x, arr#x], [id#x, arr#x AS _gen_input_0#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS id#x, col2#x AS arr#x]
+ +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY 2, 3
+-- !query analysis
+Project [x1#x, x2#xL, x3#x, id#x]
++- Generate stack(2, first, _gen_input_2#xL, _gen_input_3#x, second, 0,
array(1, 2, 3)), false, [x1#x, x2#xL, x3#x]
+ +- Aggregate [arr#x, id#x], [count(1) AS _gen_input_2#xL, arr#x AS
_gen_input_3#x, id#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS id#x, col2#x AS arr#x]
+ +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+-- !query analysis
+Project [x1#x, x2#xL, x3#x, id#x]
++- Generate stack(2, first, _gen_input_2#xL, _gen_input_3#x, second, 0,
array(1, 2, 3)), false, [x1#x, x2#xL, x3#x]
+ +- Aggregate [arr#x, id#x], [count(1) AS _gen_input_2#xL, arr#x AS
_gen_input_3#x, id#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS id#x, col2#x AS arr#x]
+ +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+HAVING size (_gen_input_3) > 0
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`_gen_input_3`",
+ "proposal" : "`t`.`id`, `x1`, `x2`, `x3`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 198,
+ "stopIndex" : 209,
+ "fragment" : "_gen_input_3"
+ } ]
+}
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1
+FROM (VALUES (1), (2)) AS t1(c1)
+ FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+ USING (c1)
+-- !query analysis
+Project [x1#x]
++- Project [c1#x, x1#x]
+ +- Generate explode(array(c1#x, c1#x)), false, [x1#x]
+ +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+ +- Join FullOuter, (c1#x = c1#x)
+ :- SubqueryAlias t1
+ : +- Project [col1#x AS c1#x]
+ : +- LocalRelation [col1#x]
+ +- SubqueryAlias t2
+ +- Project [col1#x AS c1#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+ FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+ USING (c1)
+-- !query analysis
+Project [x1#x, x2#x]
++- Project [c1#x, x1#x, x2#x]
+ +- Generate explode(array(c1#x)), false, [x2#x]
+ +- Project [c1#x, x1#x, c1#x]
+ +- Generate explode(array(c1#x, c1#x)), false, [x1#x]
+ +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+ +- Join FullOuter, (c1#x = c1#x)
+ :- SubqueryAlias t1
+ : +- Project [col1#x AS c1#x]
+ : +- LocalRelation [col1#x]
+ +- SubqueryAlias t2
+ +- Project [col1#x AS c1#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT id, posexplode(arr) AS (index, value), index, value
+FROM (VALUES (42, array('a', 'b', 'c')), (42, array('t'))) AS t(id, arr)
+-- !query analysis
+Project [id#x, index#x, value#x, index#x, value#x]
++- Generate posexplode(arr#x), false, [index#x, value#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS id#x, col2#x AS arr#x]
+ +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col
+-- !query analysis
+org.apache.spark.SparkException
+{
+ "errorClass" : "INTERNAL_ERROR",
+ "sqlState" : "XX000",
+ "messageParameters" : {
+ "message" : "Resolved plan should not contain any
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 39,
+ "stopIndex" : 41,
+ "fragment" : "arr"
+ } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*)
+-- !query analysis
+Project [arr#x, col#x, count(1)#xL]
++- Generate explode(_gen_input_0#x), false, [col#x]
+ +- Project [arr#x, arr#x AS _gen_input_0#x, count(1)#xL AS count(1)#xL]
+ +- Project [count(1)#xL, array(1, 2, 3) AS arr#x]
+ +- Aggregate [count(1) AS count(1)#xL]
+ +- OneRowRelation
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*), col + 1 as col2
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`col`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 62,
+ "stopIndex" : 64,
+ "fragment" : "col"
+ } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2, count(*)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`col`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 40,
+ "stopIndex" : 42,
+ "fragment" : "col"
+ } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, col + 1 as col2
+-- !query analysis
+org.apache.spark.SparkException
+{
+ "errorClass" : "INTERNAL_ERROR",
+ "sqlState" : "XX000",
+ "messageParameters" : {
+ "message" : "Resolved plan should not contain any
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 39,
+ "stopIndex" : 41,
+ "fragment" : "arr"
+ } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2
+-- !query analysis
+Project [col#x, (col#x + 1) AS col2#x]
++- Generate explode(array(1, 2, 3)), false, [col#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, count(*) OVER ()
+-- !query analysis
+Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING)#xL]
++- Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)#xL]
+ +- Window [count(1) windowspecdefinition(specifiedwindowframe(RowFrame,
unboundedpreceding$(), unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL]
+ +- Project [col#x]
+ +- Generate explode(array(1, 2, 3)), false, [col#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*)
+-- !query analysis
+Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING)#xL, count(1)#xL]
++- Generate explode(array(1, 2, 3)), false, [col#x]
+ +- Project [count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING)#xL, count(1)#xL]
+ +- Project [count(1)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING)#xL]
+ +- Window [count(1)
windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(),
unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)#xL]
+ +- Aggregate [count(1) AS count(1)#xL]
+ +- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
new file mode 100644
index 000000000000..a59fae3f83e9
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
@@ -0,0 +1,129 @@
+-- nested generator should fail
+SELECT explode(explode(array(array(1, 2, 3))));
+
+-- not top level generator should fail
+SELECT 1 + explode(array(1, 2, 3));
+
+-- multiple generators should work
+SELECT explode(array(0, 1, 2)), explode(array(10, 20));
+
+-- multiple generators' order is not fixed and depends on rule ordering
+SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20));
+
+-- multiple generators in aggregate should fail
+SELECT explode(array(1, 2)), explode(array(3, 4)), count(*);
+
+-- multiple generators in project with multi-alias should work
+SELECT explode(array(1, 2)) AS a, posexplode(array('x', 'y', 'z')) AS (pos, b);
+
+-- generator with aggregate function as argument should work
+SELECT explode(collect_list(id)) AS val FROM range(5);
+
+-- generator with aggregate function and other aggregates should work
+SELECT explode(collect_list(id)) AS val, count(*) AS cnt FROM range(3);
+
+-- GROUP BY generator alias should fail
+SELECT id, explode(array(1, 2, 3)) AS array_element
+FROM (VALUES (1), (2)) AS t(id)
+GROUP BY id, array_element;
+
+-- posexplode_outer with null arrays via CASE WHEN should work
+SELECT id, posexplode_outer(CASE WHEN id = 1 THEN array('a', 'b') ELSE null
END) AS (pos, val)
+FROM range(3);
+
+-- generator in CTE with union
+WITH cte AS (SELECT explode(array(1, 2, 3)) AS col)
+SELECT * FROM cte
+UNION ALL
+SELECT * FROM cte;
+
+-- generator in CTE with join
+WITH cte AS (SELECT explode(array(1, 2)) AS col)
+SELECT t1.col, t2.col FROM cte t1 JOIN cte t2 ON t1.col = t2.col;
+
+-- generator with subquery
+SELECT * FROM (SELECT explode(array(1, 2, 3)) AS val) t WHERE val > 1;
+
+-- nested subqueries with generators
+SELECT *
+FROM (SELECT * FROM (SELECT explode(array(1, 2, 3, 4, 5)) AS val) WHERE val >
1)
+WHERE val < 5;
+
+-- generator with GROUP BY ALL should respect generators arguments
+SELECT explode(arr) AS package
+FROM (VALUES(array('a', 'b'))) AS t(arr)
+GROUP BY ALL;
+
+-- generator with GROUP BY ALL should skip generators arguments with aggregate
function
+SELECT explode(collect_list(a)) AS val
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL;
+
+-- generator with GROUP BY ALL should use only non-aggregate arguments for
grouping
+SELECT stack(2, id + 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+GROUP BY ALL;
+
+-- generator with GROUP BY ordinal should count generators arguments
+SELECT id, explode(arr)
+FROM (VALUES(42, array('a', 'b', 'c'))) AS t(id, arr)
+GROUP BY id, 2;
+
+-- generator's generated aliases should be visible for ordinals
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY 2, 3;
+
+-- generator's generated aliases should be visible in GROUP BY.
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id;
+
+-- generator's generated aliases should not be visible in HAVING.
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+HAVING size (_gen_input_3) > 0;
+
+-- generator should be able to reference columns from hidden output
+SELECT explode(array(t1.c1, t2.c1)) AS x1
+FROM (VALUES (1), (2)) AS t1(c1)
+ FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+ USING (c1);
+
+-- multiple generators should be able to reference columns from hidden output
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+ FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+ USING (c1);
+
+-- generator with multi-alias and column references should work
+SELECT id, posexplode(arr) AS (index, value), index, value
+FROM (VALUES (42, array('a', 'b', 'c')), (42, array('t'))) AS t(id, arr);
+
+-- generator with LCA in argument should fail
+SELECT array(1, 2, 3) as arr, explode(arr) as col;
+
+-- generator with LCA in argument + aggregate should work
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*);
+
+-- generator with LCA in argument + aggregate + LCA from generator output
should fail
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*), col + 1 as col2;
+
+-- generator output LCA + aggregate should fail
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2, count(*);
+
+-- generator with LCA in argument + generator output LCA should fail
+SELECT array(1, 2, 3) as arr, explode(arr) as col, col + 1 as col2;
+
+-- generator output LCA should work
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2;
+
+-- generator with window function resolves in order Generator -> Window
+SELECT explode(array(1, 2, 3)) as col, count(*) OVER ();
+
+-- generator with window function and aggregate together resolves in order
Aggregate -> Window -> Generator
+SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*);
diff --git
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
new file mode 100644
index 000000000000..995798ca246b
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
@@ -0,0 +1,462 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT explode(explode(array(array(1, 2, 3))))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "expression" : "\"explode(explode(array(array(1, 2, 3))))\""
+ }
+}
+
+
+-- !query
+SELECT 1 + explode(array(1, 2, 3))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "expression" : "\"(1 + explode(array(1, 2, 3)))\""
+ }
+}
+
+
+-- !query
+SELECT explode(array(0, 1, 2)), explode(array(10, 20))
+-- !query schema
+struct<col:int,col:int>
+-- !query output
+0 10
+0 20
+1 10
+1 20
+2 10
+2 20
+
+
+-- !query
+SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
+-- !query schema
+struct<col:double,col:int>
+-- !query output
+0.0 10
+0.0 20
+1.0 10
+1.0 20
+2.0 10
+2.0 20
+
+
+-- !query
+SELECT explode(array(1, 2)), explode(array(3, 4)), count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "generators" : "\"explode(array(1, 2))\", \"explode(array(3, 4))\"",
+ "num" : "2"
+ }
+}
+
+
+-- !query
+SELECT explode(array(1, 2)) AS a, posexplode(array('x', 'y', 'z')) AS (pos, b)
+-- !query schema
+struct<a:int,pos:int,b:string>
+-- !query output
+1 0 x
+1 1 y
+1 2 z
+2 0 x
+2 1 y
+2 2 z
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val FROM range(5)
+-- !query schema
+struct<val:bigint>
+-- !query output
+0
+1
+2
+3
+4
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val, count(*) AS cnt FROM range(3)
+-- !query schema
+struct<val:bigint,cnt:bigint>
+-- !query output
+0 3
+1 3
+2 3
+
+
+-- !query
+SELECT id, explode(array(1, 2, 3)) AS array_element
+FROM (VALUES (1), (2)) AS t(id)
+GROUP BY id, array_element
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`array_element`",
+ "proposal" : "`id`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 98,
+ "stopIndex" : 110,
+ "fragment" : "array_element"
+ } ]
+}
+
+
+-- !query
+SELECT id, posexplode_outer(CASE WHEN id = 1 THEN array('a', 'b') ELSE null
END) AS (pos, val)
+FROM range(3)
+-- !query schema
+struct<id:bigint,pos:int,val:string>
+-- !query output
+0 NULL NULL
+1 0 a
+1 1 b
+2 NULL NULL
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2, 3)) AS col)
+SELECT * FROM cte
+UNION ALL
+SELECT * FROM cte
+-- !query schema
+struct<col:int>
+-- !query output
+1
+1
+2
+2
+3
+3
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2)) AS col)
+SELECT t1.col, t2.col FROM cte t1 JOIN cte t2 ON t1.col = t2.col
+-- !query schema
+struct<col:int,col:int>
+-- !query output
+1 1
+2 2
+
+
+-- !query
+SELECT * FROM (SELECT explode(array(1, 2, 3)) AS val) t WHERE val > 1
+-- !query schema
+struct<val:int>
+-- !query output
+2
+3
+
+
+-- !query
+SELECT *
+FROM (SELECT * FROM (SELECT explode(array(1, 2, 3, 4, 5)) AS val) WHERE val >
1)
+WHERE val < 5
+-- !query schema
+struct<val:int>
+-- !query output
+2
+3
+4
+
+
+-- !query
+SELECT explode(arr) AS package
+FROM (VALUES(array('a', 'b'))) AS t(arr)
+GROUP BY ALL
+-- !query schema
+struct<package:string>
+-- !query output
+a
+b
+
+
+-- !query
+SELECT explode(collect_list(a)) AS val
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+-- !query schema
+struct<val:string>
+-- !query output
+a
+b
+
+
+-- !query
+SELECT stack(2, id + 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+GROUP BY ALL
+-- !query schema
+struct<col0:bigint>
+-- !query output
+1
+11
+12
+2
+
+
+-- !query
+SELECT id, explode(arr)
+FROM (VALUES(42, array('a', 'b', 'c'))) AS t(id, arr)
+GROUP BY id, 2
+-- !query schema
+struct<id:int,col:string>
+-- !query output
+42 a
+42 b
+42 c
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY 2, 3
+-- !query schema
+struct<x1:string,x2:bigint,x3:array<string>,id:int>
+-- !query output
+first 1 ["a","b"] 1
+second 0 ["1","2","3"] 1
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+-- !query schema
+struct<x1:string,x2:bigint,x3:array<string>,id:int>
+-- !query output
+first 1 ["a","b"] 1
+second 0 ["1","2","3"] 1
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+ 'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+HAVING size (_gen_input_3) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`_gen_input_3`",
+ "proposal" : "`t`.`id`, `x1`, `x2`, `x3`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 198,
+ "stopIndex" : 209,
+ "fragment" : "_gen_input_3"
+ } ]
+}
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1
+FROM (VALUES (1), (2)) AS t1(c1)
+ FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+ USING (c1)
+-- !query schema
+struct<x1:int>
+-- !query output
+1
+2
+2
+3
+NULL
+NULL
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+ FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+ USING (c1)
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+1 1
+2 2
+2 2
+3 3
+3 3
+4 NULL
+NULL 1
+NULL NULL
+
+
+-- !query
+SELECT id, posexplode(arr) AS (index, value), index, value
+FROM (VALUES (42, array('a', 'b', 'c')), (42, array('t'))) AS t(id, arr)
+-- !query schema
+struct<id:int,index:int,value:string,index:int,value:string>
+-- !query output
+42 0 a 0 a
+42 0 t 0 t
+42 1 b 1 b
+42 2 c 2 c
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+ "errorClass" : "INTERNAL_ERROR",
+ "sqlState" : "XX000",
+ "messageParameters" : {
+ "message" : "Resolved plan should not contain any
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 39,
+ "stopIndex" : 41,
+ "fragment" : "arr"
+ } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*)
+-- !query schema
+struct<arr:array<int>,col:int,count(1):bigint>
+-- !query output
+[1,2,3] 1 1
+[1,2,3] 2 1
+[1,2,3] 3 1
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*), col + 1 as col2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`col`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 62,
+ "stopIndex" : 64,
+ "fragment" : "col"
+ } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2, count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`col`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 40,
+ "stopIndex" : 42,
+ "fragment" : "col"
+ } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, col + 1 as col2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+ "errorClass" : "INTERNAL_ERROR",
+ "sqlState" : "XX000",
+ "messageParameters" : {
+ "message" : "Resolved plan should not contain any
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 39,
+ "stopIndex" : 41,
+ "fragment" : "arr"
+ } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2
+-- !query schema
+struct<col:int,col2:int>
+-- !query output
+1 2
+2 3
+3 4
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, count(*) OVER ()
+-- !query schema
+struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING):bigint>
+-- !query output
+1 3
+2 3
+3 3
+
+
+-- !query
+SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*)
+-- !query schema
+struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING):bigint,count(1):bigint>
+-- !query output
+1 1 1
+2 1 1
+3 1 1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]