This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b3f59a1c [VL][UT] Fix scalar-subquery-select.sql in spark35(#5425)
9b3f59a1c is described below

commit 9b3f59a1caeb591778178ba5093153157278ecec
Author: Joey <[email protected]>
AuthorDate: Wed Apr 17 21:15:57 2024 +0800

    [VL][UT] Fix scalar-subquery-select.sql in spark35(#5425)
---
 .../scalar-subquery/scalar-subquery-select.sql     | 126 +------------
 .../scalar-subquery/scalar-subquery-select.sql.out | 207 ++-------------------
 .../apache/spark/sql/GlutenSQLQueryTestSuite.scala |   1 -
 3 files changed, 25 insertions(+), 309 deletions(-)

diff --git 
a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
 
b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index 48d1594fa..741292d2c 100644
--- 
a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++ 
b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -241,123 +241,17 @@ SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = 
t2.c1) FROM t1;
 
 select (select f from (select false as f, max(c2) from t1 where t1.c1 = 
t1.c1)) from t2;
 
--- Set operations in correlation path
-
-CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
-CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3);
-CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7);
-
-SELECT t0a, (SELECT sum(c) FROM
-  (SELECT t1c as c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t2c as c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(c) FROM
-  (SELECT t1c as c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t2c as c
-  FROM   t2
-  WHERE  t2a = t0a)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(c) FROM
-  (SELECT t1c as c
-  FROM   t1
-  WHERE  t1a > t0a
-  UNION ALL
-  SELECT t2c as c
-  FROM   t2
-  WHERE  t2b <= t0b)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(t1c) FROM
-  (SELECT t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0;
-
-SELECT t0a, (SELECT sum(t1c) FROM
-  (SELECT t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION DISTINCT
-  SELECT t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0;
-
--- Tests for column aliasing
-SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
-  (SELECT t1c as t1a, t1a as t1b, t0a as t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t0a as t2b, t2c as t1a, t0b as t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0;
-
--- Test handling of COUNT bug
-SELECT t0a, (SELECT count(t1c) FROM
-  (SELECT t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION DISTINCT
-  SELECT t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0;
-
--- Correlated references in project
-SELECT t0a, (SELECT sum(d) FROM
-  (SELECT t1a - t0a as d
-  FROM   t1
-  UNION ALL
-  SELECT t2a - t0a as d
-  FROM   t2)
-)
-FROM t0;
-
--- Correlated references in aggregate - unsupported
-SELECT t0a, (SELECT sum(d) FROM
-  (SELECT sum(t0a) as d
-  FROM   t1
-  UNION ALL
-  SELECT sum(t2a) + t0a as d
-  FROM   t2)
-)
-FROM t0;
+-- SPARK-43596: handle IsNull when rewriting the domain join
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false;
+WITH T AS (SELECT 1 AS a)
+SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y')
+FROM (SELECT null as col) as foo;
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true;
 
 -- SPARK-43760: the result of the subquery can be NULL.
-select *
-from
-(
+select * from (
  select t1.id c1, (
-                    select sum(c)
-                    from (
-                      select t2.id * t2.id c
-                      from range (1, 2) t2 where t1.id = t2.id
-                      group by t2.id
-                    )
-                   ) c2
- from range (1, 3) t1
-) t
+  select t2.id c from range (1, 2) t2
+  where t1.id = t2.id  ) c2
+ from range (1, 3) t1 ) t
 where t.c2 is not null;
diff --git 
a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
 
b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 088359d39..5c6f141d8 100644
--- 
a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ 
b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -576,214 +576,37 @@ false
 
 
 -- !query
-CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false
 -- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
--- !query schema
-struct<>
+struct<key:string,value:string>
 -- !query output
-
+spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline        false
 
 
 -- !query
-CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
+WITH T AS (SELECT 1 AS a)
+SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y')
+FROM (SELECT null as col) as foo
 -- !query schema
-struct<>
+struct<scalarsubquery(col, col):bigint>
 -- !query output
-
-
-
--- !query
-SELECT t0a, (SELECT sum(c) FROM
-  (SELECT t1c as c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t2c as c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1      8
-2      NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(c) FROM
-  (SELECT t1c as c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t2c as c
-  FROM   t2
-  WHERE  t2a = t0a)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
--- !query output
-1      8
-2      7
-
-
--- !query
-SELECT t0a, (SELECT sum(c) FROM
-  (SELECT t1c as c
-  FROM   t1
-  WHERE  t1a > t0a
-  UNION ALL
-  SELECT t2c as c
-  FROM   t2
-  WHERE  t2b <= t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1      5
-2      NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(t1c) FROM
-  (SELECT t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1      8
-2      NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(t1c) FROM
-  (SELECT t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION DISTINCT
-  SELECT t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1      8
-2      NULL
-
-
--- !query
-SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
-  (SELECT t1c as t1a, t1a as t1b, t0a as t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION ALL
-  SELECT t0a as t2b, t2c as t1a, t0b as t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0a, t0a, t0b, t0b):bigint>
--- !query output
-1      32
-2      NULL
-
-
--- !query
-SELECT t0a, (SELECT count(t1c) FROM
-  (SELECT t1c
-  FROM   t1
-  WHERE  t1a = t0a
-  UNION DISTINCT
-  SELECT t2c
-  FROM   t2
-  WHERE  t2b = t0b)
-)
-FROM t0
--- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
--- !query output
-1      2
-2      0
+NULL
 
 
 -- !query
-SELECT t0a, (SELECT sum(d) FROM
-  (SELECT t1a - t0a as d
-  FROM   t1
-  UNION ALL
-  SELECT t2a - t0a as d
-  FROM   t2)
-)
-FROM t0
+set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true
 -- !query schema
-struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
+struct<key:string,value:string>
 -- !query output
-1      1
-2      -2
+spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline        true
 
 
 -- !query
-SELECT t0a, (SELECT sum(d) FROM
-  (SELECT sum(t0a) as d
-  FROM   t1
-  UNION ALL
-  SELECT sum(t2a) + t0a as d
-  FROM   t2)
-)
-FROM t0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : 
"UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "sqlExprs" : "\"sum(t0a) AS d\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 36,
-    "stopIndex" : 67,
-    "fragment" : "SELECT sum(t0a) as d\n  FROM   t1"
-  } ]
-}
-
-
--- !query
-select *
-from
-(
+select * from (
  select t1.id c1, (
-                    select sum(c)
-                    from (
-                      select t2.id * t2.id c
-                      from range (1, 2) t2 where t1.id = t2.id
-                      group by t2.id
-                    )
-                   ) c2
- from range (1, 3) t1
-) t
+  select t2.id c from range (1, 2) t2
+  where t1.id = t2.id  ) c2
+ from range (1, 3) t1 ) t
 where t.c2 is not null
 -- !query schema
 struct<c1:bigint,c2:bigint>
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 9f96fa0b0..b1f3945bf 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -227,7 +227,6 @@ class GlutenSQLQueryTestSuite
     "window.sql", // Local window fixes are not added.
     // Disable for Spark 3.
     "group-by.sql",
-    "subquery/scalar-subquery/scalar-subquery-select.sql",
     "udf/udf-group-by.sql - Scala UDF"
   ) ++ otherIgnoreList ++ udafIgnoreList
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to