(spark) branch master updated: [SPARK-54687][SQL] Add golden file with generators resolution edge cases

wenchen Thu, 11 Dec 2025 17:48:39 -0800

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 8446b6d3da36 [SPARK-54687][SQL] Add golden file with generators 
resolution edge cases
8446b6d3da36 is described below

commit 8446b6d3da3638522148780f4e3a667d47fc3f4e
Author: Mikhail Nikoliukin <[email protected]>
AuthorDate: Fri Dec 12 09:48:16 2025 +0800

    [SPARK-54687][SQL] Add golden file with generators resolution edge cases
    
    ### What changes were proposed in this pull request?
    
    Just a new golden file covering some edge cases of generators' resolution.
    
    ### Why are the changes needed?
    
    Generators' resolution sometimes has very surprising behavior/gaps/bugs. It 
would be good to cover them with tests for now
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    New tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Cursor 2.2.14
    
    Closes #53447 from mikhailnik-db/add-generators-tests.
    
    Authored-by: Mikhail Nikoliukin <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../generators-resolution-edge-cases.sql.out       | 462 +++++++++++++++++++++
 .../inputs/generators-resolution-edge-cases.sql    | 129 ++++++
 .../generators-resolution-edge-cases.sql.out       | 462 +++++++++++++++++++++
 3 files changed, 1053 insertions(+)

diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
new file mode 100644
index 000000000000..2172fbe39a5b
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
@@ -0,0 +1,462 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT explode(explode(array(array(1, 2, 3))))
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expression" : "\"explode(explode(array(array(1, 2, 3))))\""
+  }
+}
+
+
+-- !query
+SELECT 1 + explode(array(1, 2, 3))
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expression" : "\"(1 + explode(array(1, 2, 3)))\""
+  }
+}
+
+
+-- !query
+SELECT explode(array(0, 1, 2)), explode(array(10, 20))
+-- !query analysis
+Project [col#x, col#x]
++- Generate explode(array(10, 20)), false, [col#x]
+   +- Generate explode(array(0, 1, 2)), false, [col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
+-- !query analysis
+Project [col#x, col#x]
++- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 as 
double))), false, [col#x]
+   +- Generate explode(array(10, 20)), false, [col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(1, 2)), explode(array(3, 4)), count(*)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "generators" : "\"explode(array(1, 2))\", \"explode(array(3, 4))\"",
+    "num" : "2"
+  }
+}
+
+
+-- !query
+SELECT explode(array(1, 2)) AS a, posexplode(array('x', 'y', 'z')) AS (pos, b)
+-- !query analysis
+Project [a#x, pos#x, b#x]
++- Generate posexplode(array(x, y, z)), false, [pos#x, b#x]
+   +- Generate explode(array(1, 2)), false, [a#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val FROM range(5)
+-- !query analysis
+Project [val#xL]
++- Generate explode(_gen_input_0#x), false, [val#xL]
+   +- Aggregate [collect_list(id#xL, 0, 0) AS _gen_input_0#x]
+      +- Range (0, 5, step=1)
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val, count(*) AS cnt FROM range(3)
+-- !query analysis
+Project [val#xL, cnt#xL]
++- Generate explode(_gen_input_0#x), false, [val#xL]
+   +- Aggregate [collect_list(id#xL, 0, 0) AS _gen_input_0#x, count(1) AS 
cnt#xL]
+      +- Range (0, 3, step=1)
+
+
+-- !query
+SELECT id, explode(array(1, 2, 3)) AS array_element
+FROM (VALUES (1), (2)) AS t(id)
+GROUP BY id, array_element
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`array_element`",
+    "proposal" : "`id`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 98,
+    "stopIndex" : 110,
+    "fragment" : "array_element"
+  } ]
+}
+
+
+-- !query
+SELECT id, posexplode_outer(CASE WHEN id = 1 THEN array('a', 'b') ELSE null 
END) AS (pos, val)
+FROM range(3)
+-- !query analysis
+Project [id#xL, pos#x, val#x]
++- Generate posexplode(CASE WHEN (id#xL = cast(1 as bigint)) THEN array(a, b) 
ELSE cast(null as array<string>) END), true, [pos#x, val#x]
+   +- Range (0, 3, step=1)
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2, 3)) AS col)
+SELECT * FROM cte
+UNION ALL
+SELECT * FROM cte
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias cte
+:     +- Project [col#x]
+:        +- Generate explode(array(1, 2, 3)), false, [col#x]
+:           +- OneRowRelation
++- Union false, false
+   :- Project [col#x]
+   :  +- SubqueryAlias cte
+   :     +- CTERelationRef xxxx, true, [col#x], false, false
+   +- Project [col#x]
+      +- SubqueryAlias cte
+         +- CTERelationRef xxxx, true, [col#x], false, false
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2)) AS col)
+SELECT t1.col, t2.col FROM cte t1 JOIN cte t2 ON t1.col = t2.col
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias cte
+:     +- Project [col#x]
+:        +- Generate explode(array(1, 2)), false, [col#x]
+:           +- OneRowRelation
++- Project [col#x, col#x]
+   +- Join Inner, (col#x = col#x)
+      :- SubqueryAlias t1
+      :  +- SubqueryAlias cte
+      :     +- CTERelationRef xxxx, true, [col#x], false, false
+      +- SubqueryAlias t2
+         +- SubqueryAlias cte
+            +- CTERelationRef xxxx, true, [col#x], false, false
+
+
+-- !query
+SELECT * FROM (SELECT explode(array(1, 2, 3)) AS val) t WHERE val > 1
+-- !query analysis
+Project [val#x]
++- Filter (val#x > 1)
+   +- SubqueryAlias t
+      +- Project [val#x]
+         +- Generate explode(array(1, 2, 3)), false, [val#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT *
+FROM (SELECT * FROM (SELECT explode(array(1, 2, 3, 4, 5)) AS val) WHERE val > 
1)
+WHERE val < 5
+-- !query analysis
+Project [val#x]
++- Filter (val#x < 5)
+   +- SubqueryAlias __auto_generated_subquery_name
+      +- Project [val#x]
+         +- Filter (val#x > 1)
+            +- SubqueryAlias __auto_generated_subquery_name
+               +- Project [val#x]
+                  +- Generate explode(array(1, 2, 3, 4, 5)), false, [val#x]
+                     +- OneRowRelation
+
+
+-- !query
+SELECT explode(arr) AS package
+FROM (VALUES(array('a', 'b'))) AS t(arr)
+GROUP BY ALL
+-- !query analysis
+Project [package#x]
++- Generate explode(_gen_input_0#x), false, [package#x]
+   +- Aggregate [arr#x], [arr#x AS _gen_input_0#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS arr#x]
+            +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(collect_list(a)) AS val
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+-- !query analysis
+Project [val#x]
++- Generate explode(_gen_input_0#x), false, [val#x]
+   +- Aggregate [collect_list(a#x, 0, 0) AS _gen_input_0#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS a#x]
+            +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT stack(2, id + 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+GROUP BY ALL
+-- !query analysis
+Project [col0#xL]
++- Generate stack(2, _gen_input_1#xL, _gen_input_2#xL), false, [col0#xL]
+   +- Aggregate [(cast(id#x as bigint) + 10)], [(cast(id#x as bigint) + 10) AS 
_gen_input_1#xL, count(val#x) AS _gen_input_2#xL]
+      +- SubqueryAlias t
+         +- Project [col1#x AS id#x, col2#x AS val#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT id, explode(arr)
+FROM (VALUES(42, array('a', 'b', 'c'))) AS t(id, arr)
+GROUP BY id, 2
+-- !query analysis
+Project [id#x, col#x]
++- Generate explode(_gen_input_0#x), false, [col#x]
+   +- Aggregate [id#x, arr#x], [id#x, arr#x AS _gen_input_0#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS id#x, col2#x AS arr#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY 2, 3
+-- !query analysis
+Project [x1#x, x2#xL, x3#x, id#x]
++- Generate stack(2, first, _gen_input_2#xL, _gen_input_3#x, second, 0, 
array(1, 2, 3)), false, [x1#x, x2#xL, x3#x]
+   +- Aggregate [arr#x, id#x], [count(1) AS _gen_input_2#xL, arr#x AS 
_gen_input_3#x, id#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS id#x, col2#x AS arr#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+-- !query analysis
+Project [x1#x, x2#xL, x3#x, id#x]
++- Generate stack(2, first, _gen_input_2#xL, _gen_input_3#x, second, 0, 
array(1, 2, 3)), false, [x1#x, x2#xL, x3#x]
+   +- Aggregate [arr#x, id#x], [count(1) AS _gen_input_2#xL, arr#x AS 
_gen_input_3#x, id#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS id#x, col2#x AS arr#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+HAVING size (_gen_input_3) > 0
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`_gen_input_3`",
+    "proposal" : "`t`.`id`, `x1`, `x2`, `x3`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 198,
+    "stopIndex" : 209,
+    "fragment" : "_gen_input_3"
+  } ]
+}
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1
+FROM (VALUES (1), (2)) AS t1(c1)
+         FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+                         USING (c1)
+-- !query analysis
+Project [x1#x]
++- Project [c1#x, x1#x]
+   +- Generate explode(array(c1#x, c1#x)), false, [x1#x]
+      +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+         +- Join FullOuter, (c1#x = c1#x)
+            :- SubqueryAlias t1
+            :  +- Project [col1#x AS c1#x]
+            :     +- LocalRelation [col1#x]
+            +- SubqueryAlias t2
+               +- Project [col1#x AS c1#x]
+                  +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+         FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+                         USING (c1)
+-- !query analysis
+Project [x1#x, x2#x]
++- Project [c1#x, x1#x, x2#x]
+   +- Generate explode(array(c1#x)), false, [x2#x]
+      +- Project [c1#x, x1#x, c1#x]
+         +- Generate explode(array(c1#x, c1#x)), false, [x1#x]
+            +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+               +- Join FullOuter, (c1#x = c1#x)
+                  :- SubqueryAlias t1
+                  :  +- Project [col1#x AS c1#x]
+                  :     +- LocalRelation [col1#x]
+                  +- SubqueryAlias t2
+                     +- Project [col1#x AS c1#x]
+                        +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT id, posexplode(arr) AS (index, value), index, value
+FROM (VALUES (42, array('a', 'b', 'c')), (42, array('t'))) AS t(id, arr)
+-- !query analysis
+Project [id#x, index#x, value#x, index#x, value#x]
++- Generate posexplode(arr#x), false, [index#x, value#x]
+   +- SubqueryAlias t
+      +- Project [col1#x AS id#x, col2#x AS arr#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col
+-- !query analysis
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Resolved plan should not contain any 
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate 
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 41,
+    "fragment" : "arr"
+  } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*)
+-- !query analysis
+Project [arr#x, col#x, count(1)#xL]
++- Generate explode(_gen_input_0#x), false, [col#x]
+   +- Project [arr#x, arr#x AS _gen_input_0#x, count(1)#xL AS count(1)#xL]
+      +- Project [count(1)#xL, array(1, 2, 3) AS arr#x]
+         +- Aggregate [count(1) AS count(1)#xL]
+            +- OneRowRelation
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*), col + 1 as col2
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`col`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 64,
+    "fragment" : "col"
+  } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2, count(*)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`col`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 40,
+    "stopIndex" : 42,
+    "fragment" : "col"
+  } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, col + 1 as col2
+-- !query analysis
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Resolved plan should not contain any 
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate 
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 41,
+    "fragment" : "arr"
+  } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2
+-- !query analysis
+Project [col#x, (col#x + 1) AS col2#x]
++- Generate explode(array(1, 2, 3)), false, [col#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, count(*) OVER ()
+-- !query analysis
+Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED 
FOLLOWING)#xL]
++- Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING)#xL]
+   +- Window [count(1) windowspecdefinition(specifiedwindowframe(RowFrame, 
unboundedpreceding$(), unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL]
+      +- Project [col#x]
+         +- Generate explode(array(1, 2, 3)), false, [col#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*)
+-- !query analysis
+Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED 
FOLLOWING)#xL, count(1)#xL]
++- Generate explode(array(1, 2, 3)), false, [col#x]
+   +- Project [count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED 
FOLLOWING)#xL, count(1)#xL]
+      +- Project [count(1)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED FOLLOWING)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED FOLLOWING)#xL]
+         +- Window [count(1) 
windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), 
unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING)#xL]
+            +- Aggregate [count(1) AS count(1)#xL]
+               +- OneRowRelation
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
new file mode 100644
index 000000000000..a59fae3f83e9
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
@@ -0,0 +1,129 @@
+-- nested generator should fail
+SELECT explode(explode(array(array(1, 2, 3))));
+
+-- not top level generator should fail
+SELECT 1 + explode(array(1, 2, 3));
+
+-- multiple generators should work
+SELECT explode(array(0, 1, 2)), explode(array(10, 20));
+
+-- multiple generators' order is not fixed and depends on rule ordering
+SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20));
+
+-- multiple generators in aggregate should fail
+SELECT explode(array(1, 2)), explode(array(3, 4)), count(*);
+
+-- multiple generators in project with multi-alias should work
+SELECT explode(array(1, 2)) AS a, posexplode(array('x', 'y', 'z')) AS (pos, b);
+
+-- generator with aggregate function as argument should work
+SELECT explode(collect_list(id)) AS val FROM range(5);
+
+-- generator with aggregate function and other aggregates should work
+SELECT explode(collect_list(id)) AS val, count(*) AS cnt FROM range(3);
+
+-- GROUP BY generator alias should fail
+SELECT id, explode(array(1, 2, 3)) AS array_element
+FROM (VALUES (1), (2)) AS t(id)
+GROUP BY id, array_element;
+
+-- posexplode_outer with null arrays via CASE WHEN should work
+SELECT id, posexplode_outer(CASE WHEN id = 1 THEN array('a', 'b') ELSE null 
END) AS (pos, val)
+FROM range(3);
+
+-- generator in CTE with union
+WITH cte AS (SELECT explode(array(1, 2, 3)) AS col)
+SELECT * FROM cte
+UNION ALL
+SELECT * FROM cte;
+
+-- generator in CTE with join
+WITH cte AS (SELECT explode(array(1, 2)) AS col)
+SELECT t1.col, t2.col FROM cte t1 JOIN cte t2 ON t1.col = t2.col;
+
+-- generator with subquery
+SELECT * FROM (SELECT explode(array(1, 2, 3)) AS val) t WHERE val > 1;
+
+-- nested subqueries with generators
+SELECT *
+FROM (SELECT * FROM (SELECT explode(array(1, 2, 3, 4, 5)) AS val) WHERE val > 
1)
+WHERE val < 5;
+
+-- generator with GROUP BY ALL should respect generators arguments
+SELECT explode(arr) AS package
+FROM (VALUES(array('a', 'b'))) AS t(arr)
+GROUP BY ALL;
+
+-- generator with GROUP BY ALL should skip generators arguments with aggregate 
function
+SELECT explode(collect_list(a)) AS val
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL;
+
+-- generator with GROUP BY ALL should use only non-aggregate arguments for 
grouping
+SELECT stack(2, id + 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+GROUP BY ALL;
+
+-- generator with GROUP BY ordinal should count generators arguments
+SELECT id, explode(arr)
+FROM (VALUES(42, array('a', 'b', 'c'))) AS t(id, arr)
+GROUP BY id, 2;
+
+-- generator's generated aliases should be visible for ordinals
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY 2, 3;
+
+-- generator's generated aliases should be visible in GROUP BY.
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id;
+
+-- generator's generated aliases should not be visible in HAVING.
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+HAVING size (_gen_input_3) > 0;
+
+-- generator should be able to reference columns from hidden output
+SELECT explode(array(t1.c1, t2.c1)) AS x1
+FROM (VALUES (1), (2)) AS t1(c1)
+         FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+                         USING (c1);
+
+-- multiple generators should be able to reference columns from hidden output
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+         FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+                         USING (c1);
+
+-- generator with multi-alias and column references should work
+SELECT id, posexplode(arr) AS (index, value), index, value
+FROM (VALUES (42, array('a', 'b', 'c')), (42, array('t'))) AS t(id, arr);
+
+-- generator with LCA in argument should fail
+SELECT array(1, 2, 3) as arr, explode(arr) as col;
+
+-- generator with LCA in argument + aggregate should work
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*);
+
+-- generator with LCA in argument + aggregate + LCA from generator output 
should fail
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*), col + 1 as col2;
+
+-- generator output LCA + aggregate should fail
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2, count(*);
+
+-- generator with LCA in argument + generator output LCA should fail
+SELECT array(1, 2, 3) as arr, explode(arr) as col, col + 1 as col2;
+
+-- generator output LCA should work
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2;
+
+-- generator with window function resolves in order Generator -> Window
+SELECT explode(array(1, 2, 3)) as col, count(*) OVER ();
+
+-- generator with window function and aggregate together resolves in order 
Aggregate -> Window -> Generator
+SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*);
diff --git 
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
new file mode 100644
index 000000000000..995798ca246b
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
@@ -0,0 +1,462 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT explode(explode(array(array(1, 2, 3))))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expression" : "\"explode(explode(array(array(1, 2, 3))))\""
+  }
+}
+
+
+-- !query
+SELECT 1 + explode(array(1, 2, 3))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expression" : "\"(1 + explode(array(1, 2, 3)))\""
+  }
+}
+
+
+-- !query
+SELECT explode(array(0, 1, 2)), explode(array(10, 20))
+-- !query schema
+struct<col:int,col:int>
+-- !query output
+0      10
+0      20
+1      10
+1      20
+2      10
+2      20
+
+
+-- !query
+SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
+-- !query schema
+struct<col:double,col:int>
+-- !query output
+0.0    10
+0.0    20
+1.0    10
+1.0    20
+2.0    10
+2.0    20
+
+
+-- !query
+SELECT explode(array(1, 2)), explode(array(3, 4)), count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "generators" : "\"explode(array(1, 2))\", \"explode(array(3, 4))\"",
+    "num" : "2"
+  }
+}
+
+
+-- !query
+SELECT explode(array(1, 2)) AS a, posexplode(array('x', 'y', 'z')) AS (pos, b)
+-- !query schema
+struct<a:int,pos:int,b:string>
+-- !query output
+1      0       x
+1      1       y
+1      2       z
+2      0       x
+2      1       y
+2      2       z
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val FROM range(5)
+-- !query schema
+struct<val:bigint>
+-- !query output
+0
+1
+2
+3
+4
+
+
+-- !query
+SELECT explode(collect_list(id)) AS val, count(*) AS cnt FROM range(3)
+-- !query schema
+struct<val:bigint,cnt:bigint>
+-- !query output
+0      3
+1      3
+2      3
+
+
+-- !query
+SELECT id, explode(array(1, 2, 3)) AS array_element
+FROM (VALUES (1), (2)) AS t(id)
+GROUP BY id, array_element
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`array_element`",
+    "proposal" : "`id`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 98,
+    "stopIndex" : 110,
+    "fragment" : "array_element"
+  } ]
+}
+
+
+-- !query
+SELECT id, posexplode_outer(CASE WHEN id = 1 THEN array('a', 'b') ELSE null 
END) AS (pos, val)
+FROM range(3)
+-- !query schema
+struct<id:bigint,pos:int,val:string>
+-- !query output
+0      NULL    NULL
+1      0       a
+1      1       b
+2      NULL    NULL
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2, 3)) AS col)
+SELECT * FROM cte
+UNION ALL
+SELECT * FROM cte
+-- !query schema
+struct<col:int>
+-- !query output
+1
+1
+2
+2
+3
+3
+
+
+-- !query
+WITH cte AS (SELECT explode(array(1, 2)) AS col)
+SELECT t1.col, t2.col FROM cte t1 JOIN cte t2 ON t1.col = t2.col
+-- !query schema
+struct<col:int,col:int>
+-- !query output
+1      1
+2      2
+
+
+-- !query
+SELECT * FROM (SELECT explode(array(1, 2, 3)) AS val) t WHERE val > 1
+-- !query schema
+struct<val:int>
+-- !query output
+2
+3
+
+
+-- !query
+SELECT *
+FROM (SELECT * FROM (SELECT explode(array(1, 2, 3, 4, 5)) AS val) WHERE val > 
1)
+WHERE val < 5
+-- !query schema
+struct<val:int>
+-- !query output
+2
+3
+4
+
+
+-- !query
+SELECT explode(arr) AS package
+FROM (VALUES(array('a', 'b'))) AS t(arr)
+GROUP BY ALL
+-- !query schema
+struct<package:string>
+-- !query output
+a
+b
+
+
+-- !query
+SELECT explode(collect_list(a)) AS val
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+-- !query schema
+struct<val:string>
+-- !query output
+a
+b
+
+
+-- !query
+SELECT stack(2, id + 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+GROUP BY ALL
+-- !query schema
+struct<col0:bigint>
+-- !query output
+1
+11
+12
+2
+
+
+-- !query
+SELECT id, explode(arr)
+FROM (VALUES(42, array('a', 'b', 'c'))) AS t(id, arr)
+GROUP BY id, 2
+-- !query schema
+struct<id:int,col:string>
+-- !query output
+42     a
+42     b
+42     c
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY 2, 3
+-- !query schema
+struct<x1:string,x2:bigint,x3:array<string>,id:int>
+-- !query output
+first  1       ["a","b"]       1
+second 0       ["1","2","3"]   1
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+-- !query schema
+struct<x1:string,x2:bigint,x3:array<string>,id:int>
+-- !query output
+first  1       ["a","b"]       1
+second 0       ["1","2","3"]   1
+
+
+-- !query
+SELECT stack(2, 'first', count(*), arr,
+             'second', 0L, array('1', '2', '3')) AS (x1, x2, x3), id
+FROM (VALUES (1, array('a', 'b'))) AS t(id, arr)
+GROUP BY _gen_input_3, id
+HAVING size (_gen_input_3) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`_gen_input_3`",
+    "proposal" : "`t`.`id`, `x1`, `x2`, `x3`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 198,
+    "stopIndex" : 209,
+    "fragment" : "_gen_input_3"
+  } ]
+}
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1
+FROM (VALUES (1), (2)) AS t1(c1)
+         FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+                         USING (c1)
+-- !query schema
+struct<x1:int>
+-- !query output
+1
+2
+2
+3
+NULL
+NULL
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+         FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+                         USING (c1)
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+1      1
+2      2
+2      2
+3      3
+3      3
+4      NULL
+NULL   1
+NULL   NULL
+
+
+-- !query
+SELECT id, posexplode(arr) AS (index, value), index, value
+FROM (VALUES (42, array('a', 'b', 'c')), (42, array('t'))) AS t(id, arr)
+-- !query schema
+struct<id:int,index:int,value:string,index:int,value:string>
+-- !query output
+42     0       a       0       a
+42     0       t       0       t
+42     1       b       1       b
+42     2       c       2       c
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Resolved plan should not contain any 
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate 
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 41,
+    "fragment" : "arr"
+  } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*)
+-- !query schema
+struct<arr:array<int>,col:int,count(1):bigint>
+-- !query output
+[1,2,3]        1       1
+[1,2,3]        2       1
+[1,2,3]        3       1
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, count(*), col + 1 as col2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`col`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 64,
+    "fragment" : "col"
+  } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2, count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`col`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 40,
+    "stopIndex" : 42,
+    "fragment" : "col"
+  } ]
+}
+
+
+-- !query
+SELECT array(1, 2, 3) as arr, explode(arr) as col, col + 1 as col2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Resolved plan should not contain any 
LateralColumnAliasReference.\nDebugging information: plan:\nGenerate 
explode(lateralAliasReference(arr)), false, [col#x]\n+- OneRowRelation\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 41,
+    "fragment" : "arr"
+  } ]
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, col + 1 as col2
+-- !query schema
+struct<col:int,col2:int>
+-- !query output
+1      2
+2      3
+3      4
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as col, count(*) OVER ()
+-- !query schema
+struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED 
FOLLOWING):bigint>
+-- !query output
+1      3
+2      3
+3      3
+
+
+-- !query
+SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*)
+-- !query schema
+struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED 
FOLLOWING):bigint,count(1):bigint>
+-- !query output
+1      1       1
+2      1       1
+3      1       1


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-54687][SQL] Add golden file with generators resolution edge cases

Reply via email to