This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 9b3f59a1c [VL][UT] Fix scalar-subquery-select.sql in spark35(#5425)
9b3f59a1c is described below
commit 9b3f59a1caeb591778178ba5093153157278ecec
Author: Joey <[email protected]>
AuthorDate: Wed Apr 17 21:15:57 2024 +0800
[VL][UT] Fix scalar-subquery-select.sql in spark35(#5425)
---
.../scalar-subquery/scalar-subquery-select.sql | 126 +------------
.../scalar-subquery/scalar-subquery-select.sql.out | 207 ++-------------------
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 1 -
3 files changed, 25 insertions(+), 309 deletions(-)
diff --git
a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index 48d1594fa..741292d2c 100644
---
a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++
b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -241,123 +241,17 @@ SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 =
t2.c1) FROM t1;
select (select f from (select false as f, max(c2) from t1 where t1.c1 =
t1.c1)) from t2;
--- Set operations in correlation path
-
-CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
-CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3);
-CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7);
-
-SELECT t0a, (SELECT sum(c) FROM
- (SELECT t1c as c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t2c as c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(c) FROM
- (SELECT t1c as c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t2c as c
- FROM t2
- WHERE t2a = t0a)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(c) FROM
- (SELECT t1c as c
- FROM t1
- WHERE t1a > t0a
- UNION ALL
- SELECT t2c as c
- FROM t2
- WHERE t2b <= t0b)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(t1c) FROM
- (SELECT t1c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(t1c) FROM
- (SELECT t1c
- FROM t1
- WHERE t1a = t0a
- UNION DISTINCT
- SELECT t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0;
-
--- Tests for column aliasing
-SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
- (SELECT t1c as t1a, t1a as t1b, t0a as t1c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t0a as t2b, t2c as t1a, t0b as t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0;
-
--- Test handling of COUNT bug
-SELECT t0a, (SELECT count(t1c) FROM
- (SELECT t1c
- FROM t1
- WHERE t1a = t0a
- UNION DISTINCT
- SELECT t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0;
-
--- Correlated references in project
-SELECT t0a, (SELECT sum(d) FROM
- (SELECT t1a - t0a as d
- FROM t1
- UNION ALL
- SELECT t2a - t0a as d
- FROM t2)
-)
-FROM t0;
-
--- Correlated references in aggregate - unsupported
-SELECT t0a, (SELECT sum(d) FROM
- (SELECT sum(t0a) as d
- FROM t1
- UNION ALL
- SELECT sum(t2a) + t0a as d
- FROM t2)
-)
-FROM t0;
+-- SPARK-43596: handle IsNull when rewriting the domain join
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false;
+WITH T AS (SELECT 1 AS a)
+SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y')
+FROM (SELECT null as col) as foo;
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true;
-- SPARK-43760: the result of the subquery can be NULL.
-select *
-from
-(
+select * from (
select t1.id c1, (
- select sum(c)
- from (
- select t2.id * t2.id c
- from range (1, 2) t2 where t1.id = t2.id
- group by t2.id
- )
- ) c2
- from range (1, 3) t1
-) t
+ select t2.id c from range (1, 2) t2
+ where t1.id = t2.id ) c2
+ from range (1, 3) t1 ) t
where t.c2 is not null;
diff --git
a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 088359d39..5c6f141d8 100644
---
a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++
b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -576,214 +576,37 @@ false
-- !query
-CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false
-- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
--- !query schema
-struct<>
+struct<key:string,value:string>
-- !query output
-
+spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline false
-- !query
-CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
+WITH T AS (SELECT 1 AS a)
+SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y')
+FROM (SELECT null as col) as foo
-- !query schema
-struct<>
+struct<scalarsubquery(col, col):bigint>
-- !query output
-
-
-
--- !query
-SELECT t0a, (SELECT sum(c) FROM
- (SELECT t1c as c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t2c as c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1 8
-2 NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(c) FROM
- (SELECT t1c as c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t2c as c
- FROM t2
- WHERE t2a = t0a)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
--- !query output
-1 8
-2 7
-
-
--- !query
-SELECT t0a, (SELECT sum(c) FROM
- (SELECT t1c as c
- FROM t1
- WHERE t1a > t0a
- UNION ALL
- SELECT t2c as c
- FROM t2
- WHERE t2b <= t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1 5
-2 NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(t1c) FROM
- (SELECT t1c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1 8
-2 NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(t1c) FROM
- (SELECT t1c
- FROM t1
- WHERE t1a = t0a
- UNION DISTINCT
- SELECT t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1 8
-2 NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
- (SELECT t1c as t1a, t1a as t1b, t0a as t1c
- FROM t1
- WHERE t1a = t0a
- UNION ALL
- SELECT t0a as t2b, t2c as t1a, t0b as t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0a, t0a, t0b, t0b):bigint>
--- !query output
-1 32
-2 NULL
-
-
--- !query
-SELECT t0a, (SELECT count(t1c) FROM
- (SELECT t1c
- FROM t1
- WHERE t1a = t0a
- UNION DISTINCT
- SELECT t2c
- FROM t2
- WHERE t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1 2
-2 0
+NULL
-- !query
-SELECT t0a, (SELECT sum(d) FROM
- (SELECT t1a - t0a as d
- FROM t1
- UNION ALL
- SELECT t2a - t0a as d
- FROM t2)
-)
-FROM t0
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true
-- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
+struct<key:string,value:string>
-- !query output
-1 1
-2 -2
+spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline true
-- !query
-SELECT t0a, (SELECT sum(d) FROM
- (SELECT sum(t0a) as d
- FROM t1
- UNION ALL
- SELECT sum(t2a) + t0a as d
- FROM t2)
-)
-FROM t0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-{
- "errorClass" :
"UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
- "sqlState" : "0A000",
- "messageParameters" : {
- "sqlExprs" : "\"sum(t0a) AS d\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 36,
- "stopIndex" : 67,
- "fragment" : "SELECT sum(t0a) as d\n FROM t1"
- } ]
-}
-
-
--- !query
-select *
-from
-(
+select * from (
select t1.id c1, (
- select sum(c)
- from (
- select t2.id * t2.id c
- from range (1, 2) t2 where t1.id = t2.id
- group by t2.id
- )
- ) c2
- from range (1, 3) t1
-) t
+ select t2.id c from range (1, 2) t2
+ where t1.id = t2.id ) c2
+ from range (1, 3) t1 ) t
where t.c2 is not null
-- !query schema
struct<c1:bigint,c2:bigint>
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 9f96fa0b0..b1f3945bf 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -227,7 +227,6 @@ class GlutenSQLQueryTestSuite
"window.sql", // Local window fixes are not added.
// Disable for Spark 3.
"group-by.sql",
- "subquery/scalar-subquery/scalar-subquery-select.sql",
"udf/udf-group-by.sql - Scala UDF"
) ++ otherIgnoreList ++ udafIgnoreList
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]