This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ec587936e881 [SPARK-51082][SQL][TESTS] Move all group by alias related
tests to group-by-alias.sql
ec587936e881 is described below
commit ec587936e8810282990a791df81c34d9f5abe8ee
Author: mihailoale-db <[email protected]>
AuthorDate: Wed Feb 5 14:12:49 2025 +0100
[SPARK-51082][SQL][TESTS] Move all group by alias related tests to
group-by-alias.sql
### What changes were proposed in this pull request?
Move all group by alias related tests to `group-by-alias.sql`.
### Why are the changes needed?
Better visibility of this specific feature tested.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #49795 from mihailoale-db/refactorgroupbyalias.
Authored-by: mihailoale-db <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../analyzer-results/group-by-alias.sql.out | 119 +++++++++++++++++++++
.../sql-tests/analyzer-results/group-by.sql.out | 87 ---------------
.../analyzer-results/table-aliases.sql.out | 14 ---
.../resources/sql-tests/inputs/group-by-alias.sql | 21 ++++
.../test/resources/sql-tests/inputs/group-by.sql | 13 ---
.../resources/sql-tests/inputs/table-aliases.sql | 2 -
.../sql-tests/results/group-by-alias.sql.out | 116 ++++++++++++++++++++
.../resources/sql-tests/results/group-by.sql.out | 88 ---------------
.../sql-tests/results/table-aliases.sql.out | 9 --
9 files changed, 256 insertions(+), 213 deletions(-)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out
new file mode 100644
index 000000000000..f26c1d5a2c2b
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out
@@ -0,0 +1,119 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null,
null)
+AS testData(a, b)
+-- !query analysis
+CreateViewCommand `testData`, SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null,
null)
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
+ +- Project [a#x, b#x]
+ +- SubqueryAlias testData
+ +- LocalRelation [a#x, b#x]
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query analysis
+Aggregate [a#x], [a#x AS k#x, count(b#x) AS count(b)#xL]
++- SubqueryAlias testdata
+ +- View (`testData`, [a#x, b#x])
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+ +- Project [a#x, b#x]
+ +- SubqueryAlias testData
+ +- LocalRelation [a#x, b#x]
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
+-- !query analysis
+Filter (k#x > 1)
++- Aggregate [a#x], [a#x AS k#x, count(b#x) AS count(b)#xL]
+ +- SubqueryAlias testdata
+ +- View (`testData`, [a#x, b#x])
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+ +- Project [a#x, b#x]
+ +- SubqueryAlias testData
+ +- LocalRelation [a#x, b#x]
+
+
+-- !query
+SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k
+-- !query analysis
+Aggregate [col1#x], [col1#x AS k#x, sum(col2#x) AS sum(col2)#xL]
++- SubqueryAlias t
+ +- Project [a#x AS col1#x, b#x AS col2#x]
+ +- SubqueryAlias testdata
+ +- View (`testData`, [a#x, b#x])
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+ +- Project [a#x, b#x]
+ +- SubqueryAlias testData
+ +- LocalRelation [a#x, b#x]
+
+
+-- !query
+SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`non_existing`",
+ "proposal" : "`a`, `b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 22,
+ "stopIndex" : 33,
+ "fragment" : "non_existing"
+ } ]
+}
+
+
+-- !query
+SELECT COUNT(b) AS k FROM testData GROUP BY k
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "GROUP_BY_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "sqlExpr" : "count(testdata.b)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "COUNT(b)"
+ } ]
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query analysis
+SetCommand (spark.sql.groupByAliases,Some(false))
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`k`",
+ "proposal" : "`a`, `b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 48,
+ "stopIndex" : 48,
+ "fragment" : "k"
+ } ]
+}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 304b382c7bbe..ebbe8c381c8a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -202,72 +202,6 @@ Aggregate [a#x], [count(distinct b#x) AS count(DISTINCT
b)#xL, count(distinct b#
+- OneRowRelation
--- !query
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k
--- !query analysis
-Aggregate [a#x], [a#x AS k#x, count(b#x) AS count(b)#xL]
-+- SubqueryAlias testdata
- +- View (`testData`, [a#x, b#x])
- +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
- +- Project [a#x, b#x]
- +- SubqueryAlias testData
- +- LocalRelation [a#x, b#x]
-
-
--- !query
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
--- !query analysis
-Filter (k#x > 1)
-+- Aggregate [a#x], [a#x AS k#x, count(b#x) AS count(b)#xL]
- +- SubqueryAlias testdata
- +- View (`testData`, [a#x, b#x])
- +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
- +- Project [a#x, b#x]
- +- SubqueryAlias testData
- +- LocalRelation [a#x, b#x]
-
-
--- !query
-SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
--- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
- "sqlState" : "42703",
- "messageParameters" : {
- "objectName" : "`non_existing`",
- "proposal" : "`a`, `b`"
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 22,
- "stopIndex" : 33,
- "fragment" : "non_existing"
- } ]
-}
-
-
--- !query
-SELECT COUNT(b) AS k FROM testData GROUP BY k
--- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "GROUP_BY_AGGREGATE",
- "sqlState" : "42903",
- "messageParameters" : {
- "sqlExpr" : "count(testdata.b)"
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 15,
- "fragment" : "COUNT(b)"
- } ]
-}
-
-
-- !query
CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM
VALUES
(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
@@ -299,27 +233,6 @@ set spark.sql.groupByAliases=false
SetCommand (spark.sql.groupByAliases,Some(false))
--- !query
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k
--- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
- "sqlState" : "42703",
- "messageParameters" : {
- "objectName" : "`k`",
- "proposal" : "`a`, `b`"
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 48,
- "stopIndex" : 48,
- "fragment" : "k"
- } ]
-}
-
-
-- !query
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
-- !query analysis
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
index b7eacd1b1b0e..a7ce04b9cf36 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
@@ -38,20 +38,6 @@ Project [col1#x, col2#x]
+- LocalRelation [a#x, b#x]
--- !query
-SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k
--- !query analysis
-Aggregate [col1#x], [col1#x AS k#x, sum(col2#x) AS sum(col2)#xL]
-+- SubqueryAlias t
- +- Project [a#x AS col1#x, b#x AS col2#x]
- +- SubqueryAlias testdata
- +- View (`testData`, [a#x, b#x])
- +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
- +- Project [a#x, b#x]
- +- SubqueryAlias testData
- +- LocalRelation [a#x, b#x]
-
-
-- !query
SELECT * FROM testData AS t(col1, col2, col3)
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql
b/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql
new file mode 100644
index 000000000000..34d76d04dc5a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql
@@ -0,0 +1,21 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null,
null)
+AS testData(a, b);
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;
+SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k;
+
+-- GROUP BY alias with invalid col in SELECT list
+SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k;
+
+-- Aggregate functions cannot be used in GROUP BY
+SELECT COUNT(b) AS k FROM testData GROUP BY k;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 0cc1f62b0583..c1b5e88c43f2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -44,16 +44,6 @@ FROM testData;
-- Aggregate with foldable input and multiple distinct groups.
SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3
AS c) GROUP BY a;
--- Aliases in SELECT could be used in GROUP BY
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;
-
--- GROUP BY alias with invalid col in SELECT list
-SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k;
-
--- Aggregate functions cannot be used in GROUP BY
-SELECT COUNT(b) AS k FROM testData GROUP BY k;
-
-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM
VALUES
(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
@@ -62,9 +52,6 @@ SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias
GROUP BY a;
-- turn off group by aliases
set spark.sql.groupByAliases=false;
--- Check analysis exceptions
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
-
-- Aggregate with empty input and non-empty GroupBy expressions.
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
index d2aef1f83863..914bd7a7cdd5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
@@ -6,8 +6,6 @@ SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1;
SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2;
-SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k;
-
-- Aliasing the wrong number of columns in the FROM clause
SELECT * FROM testData AS t(col1, col2, col3);
diff --git
a/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out
b/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out
new file mode 100644
index 000000000000..a2e2cbef66bf
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out
@@ -0,0 +1,116 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null,
null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query schema
+struct<k:int,count(b):bigint>
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
+-- !query schema
+struct<k:int,count(b):bigint>
+-- !query output
+2 2
+3 2
+
+
+-- !query
+SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k
+-- !query schema
+struct<k:int,sum(col2):bigint>
+-- !query output
+1 3
+2 3
+3 3
+NULL 1
+
+
+-- !query
+SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`non_existing`",
+ "proposal" : "`a`, `b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 22,
+ "stopIndex" : 33,
+ "fragment" : "non_existing"
+ } ]
+}
+
+
+-- !query
+SELECT COUNT(b) AS k FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "GROUP_BY_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "sqlExpr" : "count(testdata.b)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "COUNT(b)"
+ } ]
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByAliases false
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`k`",
+ "proposal" : "`a`, `b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 48,
+ "stopIndex" : 48,
+ "fragment" : "k"
+ } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 98ad1a0a5bba..8608dbef843f 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -173,71 +173,6 @@ struct<count(DISTINCT b):bigint,count(DISTINCT b,
c):bigint>
1 1
--- !query
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k
--- !query schema
-struct<k:int,count(b):bigint>
--- !query output
-1 2
-2 2
-3 2
-NULL 1
-
-
--- !query
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
--- !query schema
-struct<k:int,count(b):bigint>
--- !query output
-2 2
-3 2
-
-
--- !query
-SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
- "sqlState" : "42703",
- "messageParameters" : {
- "objectName" : "`non_existing`",
- "proposal" : "`a`, `b`"
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 22,
- "stopIndex" : 33,
- "fragment" : "non_existing"
- } ]
-}
-
-
--- !query
-SELECT COUNT(b) AS k FROM testData GROUP BY k
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "GROUP_BY_AGGREGATE",
- "sqlState" : "42903",
- "messageParameters" : {
- "sqlExpr" : "count(testdata.b)"
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 15,
- "fragment" : "COUNT(b)"
- } ]
-}
-
-
-- !query
CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM
VALUES
(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
@@ -271,29 +206,6 @@ struct<key:string,value:string>
spark.sql.groupByAliases false
--- !query
-SELECT a AS k, COUNT(b) FROM testData GROUP BY k
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
- "sqlState" : "42703",
- "messageParameters" : {
- "objectName" : "`k`",
- "proposal" : "`a`, `b`"
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 48,
- "stopIndex" : 48,
- "fragment" : "k"
- } ]
-}
-
-
-- !query
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
-- !query schema
diff --git
a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index 1878452d1c20..ce9f0ecbd1b5 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -24,15 +24,6 @@ struct<col1:int,col2:int>
2 1
--- !query
-SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k
--- !query schema
-struct<k:int,sum(col2):bigint>
--- !query output
-1 3
-2 1
-
-
-- !query
SELECT * FROM testData AS t(col1, col2, col3)
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]