asfgit closed pull request #23213: [SPARK-26262][SQL] Runs SQLQueryTestSuite on
mixed config sets: WHOLESTAGE_CODEGEN_ENABLED and CODEGEN_FACTORY_MODE
URL: https://github.com/apache/spark/pull/23213
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index ec263ea70bd4a..7e81ff1aba37b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -141,8 +141,3 @@ SELECT every("true");
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
-
--- simple explain of queries having every/some/any agregates. Optimized
--- plan should show the rewritten aggregate expression.
-EXPLAIN EXTENDED SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k;
-
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
index 41d316444ed6b..b3ec956cd178e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
@@ -49,6 +49,3 @@ select * from values ("one", count(1)), ("two", 2) as data(a,
b);
-- string to timestamp
select * from values (timestamp('1991-12-06 00:00:00.0'),
array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0')))
as data(a, b);
-
--- cross-join inline tables
-EXPLAIN EXTENDED SELECT * FROM VALUES ('one', 1), ('three', null) CROSS JOIN
VALUES ('one', 1), ('three', null);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql
b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
index 37f9cd44da7f2..ba14789d48db6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
@@ -29,27 +29,6 @@ select 2 * 5;
select 5 % 3;
select pmod(-7, 3);
--- check operator precedence.
--- We follow Oracle operator precedence in the table below that lists the
levels of precedence
--- among SQL operators from high to low:
-------------------------------------------------------------------------------------------
--- Operator Operation
-------------------------------------------------------------------------------------------
--- +, - identity, negation
--- *, / multiplication, division
--- +, -, || addition, subtraction,
concatenation
--- =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison
--- NOT exponentiation, logical
negation
--- AND conjunction
--- OR disjunction
-------------------------------------------------------------------------------------------
-explain select 'a' || 1 + 2;
-explain select 1 - 2 || 'b';
-explain select 2 * 4 + 3 || 'b';
-explain select 3 + 1 || 'a' || 4 / 2;
-explain select 1 == 1 OR 'a' || 'b' == 'ab';
-explain select 'a' || 'c' == 'ac' AND 2 == 3;
-
-- math functions
select cot(1);
select cot(null);
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
index f1461032065ad..1ae49c8bfc76a 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
@@ -12,11 +12,6 @@ SELECT nullif(1, 2.1d), nullif(1, 1.0d);
SELECT nvl(1, 2.1d), nvl(null, 2.1d);
SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d);
--- explain for these functions; use range to avoid constant folding
-explain extended
-select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y')
-from range(2);
-
-- SPARK-16730 cast alias functions for Hive compatibility
SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1);
SELECT float(1), double(1), decimal(1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 2effb43183d75..fbc231627e36f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -5,10 +5,6 @@ select format_string();
-- A pipe operator for string concatenation
select 'a' || 'b' || 'c';
--- Check if catalyst combine nested `Concat`s
-EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10));
-
-- replace function
select replace('abc', 'b', '123');
select replace('abc', 'b');
@@ -25,29 +21,6 @@ select left(null, -2), left("abcd", -2), left("abcd", 0),
left("abcd", 'a');
select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd",
null);
select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd",
'a');
--- turn off concatBinaryAsString
-set spark.sql.function.concatBinaryAsString=false;
-
--- Check if catalyst combine nested `Concat`s if concatBinaryAsString=false
-EXPLAIN SELECT ((col1 || col2) || (col3 || col4)) col
-FROM (
- SELECT
- string(id) col1,
- string(id + 1) col2,
- encode(string(id + 2), 'utf-8') col3,
- encode(string(id + 3), 'utf-8') col4
- FROM range(10)
-);
-
-EXPLAIN SELECT (col1 || (col3 || col4)) col
-FROM (
- SELECT
- string(id) col1,
- encode(string(id + 2), 'utf-8') col3,
- encode(string(id + 3), 'utf-8') col4
- FROM range(10)
-);
-
-- split function
SELECT split('aa1cc2ee3', '[1-9]+');
SELECT split('aa1cc2ee3', '[1-9]+', 2);
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
index 72cd8ca9d8722..6f14c8ca87821 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
@@ -21,9 +21,3 @@ select * from range(1, null);
-- range call with a mixed-case function name
select * from RaNgE(2);
-
--- Explain
-EXPLAIN select * from RaNgE(2);
-
--- cross-join table valued functions
-EXPLAIN EXTENDED SELECT * FROM range(3) CROSS JOIN range(3);
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 9a8d025331b67..daf47c4d0a39a 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 47
+-- Number of queries: 46
-- !query 0
@@ -459,31 +459,3 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER
BY v ASC NULLS FIRST RA
5 NULL NULL
5 false false
5 true true
-
-
--- !query 46
-EXPLAIN EXTENDED SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k
--- !query 46 schema
-struct<plan:string>
--- !query 46 output
-== Parsed Logical Plan ==
-'Aggregate ['k], ['k, unresolvedalias('every('v), None),
unresolvedalias('some('v), None), unresolvedalias('any('v), None)]
-+- 'UnresolvedRelation `test_agg`
-
-== Analyzed Logical Plan ==
-k: int, every(v): boolean, some(v): boolean, any(v): boolean
-Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x,
any(v#x) AS any(v)#x]
-+- SubqueryAlias `test_agg`
- +- Project [k#x, v#x]
- +- SubqueryAlias `test_agg`
- +- LocalRelation [k#x, v#x]
-
-== Optimized Logical Plan ==
-Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS some(v)#x, max(v#x)
AS any(v)#x]
-+- LocalRelation [k#x, v#x]
-
-== Physical Plan ==
-*HashAggregate(keys=[k#x], functions=[min(v#x), max(v#x)], output=[k#x,
every(v)#x, some(v)#x, any(v)#x])
-+- Exchange hashpartitioning(k#x, 200)
- +- *HashAggregate(keys=[k#x], functions=[partial_min(v#x),
partial_max(v#x)], output=[k#x, min#x, max#x])
- +- LocalTableScan [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index c065ce5012929..4e80f0bda5513 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
+-- Number of queries: 17
-- !query 0
@@ -151,33 +151,3 @@ select * from values (timestamp('1991-12-06 00:00:00.0'),
array(timestamp('1991-
struct<a:timestamp,b:array<timestamp>>
-- !query 16 output
1991-12-06 00:00:00 [1991-12-06 01:00:00.0,1991-12-06 12:00:00.0]
-
-
--- !query 17
-EXPLAIN EXTENDED SELECT * FROM VALUES ('one', 1), ('three', null) CROSS JOIN
VALUES ('one', 1), ('three', null)
--- !query 17 schema
-struct<plan:string>
--- !query 17 output
-== Parsed Logical Plan ==
-'Project [*]
-+- 'Join Cross
- :- 'UnresolvedInlineTable [col1, col2], [List(one, 1), List(three, null)]
- +- 'UnresolvedInlineTable [col1, col2], [List(one, 1), List(three, null)]
-
-== Analyzed Logical Plan ==
-col1: string, col2: int, col1: string, col2: int
-Project [col1#x, col2#x, col1#x, col2#x]
-+- Join Cross
- :- LocalRelation [col1#x, col2#x]
- +- LocalRelation [col1#x, col2#x]
-
-== Optimized Logical Plan ==
-Join Cross
-:- LocalRelation [col1#x, col2#x]
-+- LocalRelation [col1#x, col2#x]
-
-== Physical Plan ==
-BroadcastNestedLoopJoin BuildRight, Cross
-:- LocalTableScan [col1#x, col2#x]
-+- BroadcastExchange IdentityBroadcastMode
- +- LocalTableScan [col1#x, col2#x]
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index 570b281353f3d..e0cbd575bc346 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 55
+-- Number of queries: 49
-- !query 0
@@ -195,260 +195,200 @@ struct<pmod(-7, 3):int>
-- !query 24
-explain select 'a' || 1 + 2
+select cot(1)
-- !query 24 schema
-struct<plan:string>
+struct<COT(CAST(1 AS DOUBLE)):double>
-- !query 24 output
-== Physical Plan ==
-*Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS
DOUBLE))#x]
-+- *Scan OneRowRelation[]
+0.6420926159343306
-- !query 25
-explain select 1 - 2 || 'b'
+select cot(null)
-- !query 25 schema
-struct<plan:string>
+struct<COT(CAST(NULL AS DOUBLE)):double>
-- !query 25 output
-== Physical Plan ==
-*Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x]
-+- *Scan OneRowRelation[]
+NULL
-- !query 26
-explain select 2 * 4 + 3 || 'b'
+select cot(0)
-- !query 26 schema
-struct<plan:string>
+struct<COT(CAST(0 AS DOUBLE)):double>
-- !query 26 output
-== Physical Plan ==
-*Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x]
-+- *Scan OneRowRelation[]
+Infinity
-- !query 27
-explain select 3 + 1 || 'a' || 4 / 2
+select cot(-1)
-- !query 27 schema
-struct<plan:string>
+struct<COT(CAST(-1 AS DOUBLE)):double>
-- !query 27 output
-== Physical Plan ==
-*Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS
DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x]
-+- *Scan OneRowRelation[]
+-0.6420926159343306
-- !query 28
-explain select 1 == 1 OR 'a' || 'b' == 'ab'
+select ceiling(0)
-- !query 28 schema
-struct<plan:string>
+struct<CEIL(CAST(0 AS DOUBLE)):bigint>
-- !query 28 output
-== Physical Plan ==
-*Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]
-+- *Scan OneRowRelation[]
+0
-- !query 29
-explain select 'a' || 'c' == 'ac' AND 2 == 3
+select ceiling(1)
-- !query 29 schema
-struct<plan:string>
+struct<CEIL(CAST(1 AS DOUBLE)):bigint>
-- !query 29 output
-== Physical Plan ==
-*Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]
-+- *Scan OneRowRelation[]
+1
-- !query 30
-select cot(1)
+select ceil(1234567890123456)
-- !query 30 schema
-struct<COT(CAST(1 AS DOUBLE)):double>
+struct<CEIL(1234567890123456):bigint>
-- !query 30 output
-0.6420926159343306
+1234567890123456
-- !query 31
-select cot(null)
+select ceiling(1234567890123456)
-- !query 31 schema
-struct<COT(CAST(NULL AS DOUBLE)):double>
+struct<CEIL(1234567890123456):bigint>
-- !query 31 output
-NULL
+1234567890123456
-- !query 32
-select cot(0)
+select ceil(0.01)
-- !query 32 schema
-struct<COT(CAST(0 AS DOUBLE)):double>
+struct<CEIL(0.01):decimal(1,0)>
-- !query 32 output
-Infinity
+1
-- !query 33
-select cot(-1)
+select ceiling(-0.10)
-- !query 33 schema
-struct<COT(CAST(-1 AS DOUBLE)):double>
+struct<CEIL(-0.10):decimal(1,0)>
-- !query 33 output
--0.6420926159343306
+0
-- !query 34
-select ceiling(0)
+select floor(0)
-- !query 34 schema
-struct<CEIL(CAST(0 AS DOUBLE)):bigint>
+struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
-- !query 34 output
0
-- !query 35
-select ceiling(1)
+select floor(1)
-- !query 35 schema
-struct<CEIL(CAST(1 AS DOUBLE)):bigint>
+struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
-- !query 35 output
1
-- !query 36
-select ceil(1234567890123456)
+select floor(1234567890123456)
-- !query 36 schema
-struct<CEIL(1234567890123456):bigint>
+struct<FLOOR(1234567890123456):bigint>
-- !query 36 output
1234567890123456
-- !query 37
-select ceiling(1234567890123456)
--- !query 37 schema
-struct<CEIL(1234567890123456):bigint>
--- !query 37 output
-1234567890123456
-
-
--- !query 38
-select ceil(0.01)
--- !query 38 schema
-struct<CEIL(0.01):decimal(1,0)>
--- !query 38 output
-1
-
-
--- !query 39
-select ceiling(-0.10)
--- !query 39 schema
-struct<CEIL(-0.10):decimal(1,0)>
--- !query 39 output
-0
-
-
--- !query 40
-select floor(0)
--- !query 40 schema
-struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
--- !query 40 output
-0
-
-
--- !query 41
-select floor(1)
--- !query 41 schema
-struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
--- !query 41 output
-1
-
-
--- !query 42
-select floor(1234567890123456)
--- !query 42 schema
-struct<FLOOR(1234567890123456):bigint>
--- !query 42 output
-1234567890123456
-
-
--- !query 43
select floor(0.01)
--- !query 43 schema
+-- !query 37 schema
struct<FLOOR(0.01):decimal(1,0)>
--- !query 43 output
+-- !query 37 output
0
--- !query 44
+-- !query 38
select floor(-0.10)
--- !query 44 schema
+-- !query 38 schema
struct<FLOOR(-0.10):decimal(1,0)>
--- !query 44 output
+-- !query 38 output
-1
--- !query 45
+-- !query 39
select 1 > 0.00001
--- !query 45 schema
+-- !query 39 schema
struct<(CAST(1 AS BIGINT) > 0):boolean>
--- !query 45 output
+-- !query 39 output
true
--- !query 46
+-- !query 40
select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null,
null)
--- !query 46 schema
+-- !query 40 schema
struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS
INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS
DOUBLE)):double>
--- !query 46 output
+-- !query 40 output
1 NULL 0 NULL NULL NULL
--- !query 47
+-- !query 41
select BIT_LENGTH('abc')
--- !query 47 schema
+-- !query 41 schema
struct<bit_length(abc):int>
--- !query 47 output
+-- !query 41 output
24
--- !query 48
+-- !query 42
select CHAR_LENGTH('abc')
--- !query 48 schema
+-- !query 42 schema
struct<length(abc):int>
--- !query 48 output
+-- !query 42 output
3
--- !query 49
+-- !query 43
select CHARACTER_LENGTH('abc')
--- !query 49 schema
+-- !query 43 schema
struct<length(abc):int>
--- !query 49 output
+-- !query 43 output
3
--- !query 50
+-- !query 44
select OCTET_LENGTH('abc')
--- !query 50 schema
+-- !query 44 schema
struct<octet_length(abc):int>
--- !query 50 output
+-- !query 44 output
3
--- !query 51
+-- !query 45
select abs(-3.13), abs('-2.19')
--- !query 51 schema
+-- !query 45 schema
struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS DOUBLE)):double>
--- !query 51 output
+-- !query 45 output
3.13 2.19
--- !query 52
+-- !query 46
select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11)
--- !query 52 schema
+-- !query 46 schema
struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11
AS DOUBLE)):double,(- -1.11):decimal(3,2)>
--- !query 52 output
+-- !query 46 output
-1.11 -1.11 1.11 1.11
--- !query 53
+-- !query 47
select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2),
pmod(null, null)
--- !query 53 schema
+-- !query 47 schema
struct<pmod(-7, 2):int,pmod(0, 2):int,pmod(7, 0):int,pmod(7, CAST(NULL AS
INT)):int,pmod(CAST(NULL AS INT), 2):int,pmod(CAST(NULL AS DOUBLE), CAST(NULL
AS DOUBLE)):double>
--- !query 53 output
+-- !query 47 output
1 0 NULL NULL NULL NULL
--- !query 54
+-- !query 48
select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as
smallint), cast(0 as smallint))
--- !query 54 schema
+-- !query 48 schema
struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS
DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS
SMALLINT)):smallint>
--- !query 54 output
+-- !query 48 output
NULL NULL
diff --git
a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index e035505f15d28..69a8e958000db 100644
---
a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
+-- Number of queries: 14
-- !query 0
@@ -67,74 +67,49 @@ struct<nvl2(NULL, 1, 2.1D):double,nvl2('n', 1, 2.1D):double>
-- !query 8
-explain extended
-select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y')
-from range(2)
--- !query 8 schema
-struct<plan:string>
--- !query 8 output
-== Parsed Logical Plan ==
-'Project [unresolvedalias('ifnull('id, x), None), unresolvedalias('nullif('id,
x), None), unresolvedalias('nvl('id, x), None), unresolvedalias('nvl2('id, x,
y), None)]
-+- 'UnresolvedTableValuedFunction range, [2]
-
-== Analyzed Logical Plan ==
-ifnull(`id`, 'x'): string, nullif(`id`, 'x'): bigint, nvl(`id`, 'x'): string,
nvl2(`id`, 'x', 'y'): string
-Project [ifnull(id#xL, x) AS ifnull(`id`, 'x')#x, nullif(id#xL, x) AS
nullif(`id`, 'x')#xL, nvl(id#xL, x) AS nvl(`id`, 'x')#x, nvl2(id#xL, x, y) AS
nvl2(`id`, 'x', 'y')#x]
-+- Range (0, 2, step=1, splits=None)
-
-== Optimized Logical Plan ==
-Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS
nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x
AS nvl2(`id`, 'x', 'y')#x]
-+- Range (0, 2, step=1, splits=None)
-
-== Physical Plan ==
-*Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS
nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x
AS nvl2(`id`, 'x', 'y')#x]
-+- *Range (0, 2, step=1, splits=2)
-
-
--- !query 9
SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)
--- !query 9 schema
+-- !query 8 schema
struct<CAST(1 AS BOOLEAN):boolean,CAST(1 AS TINYINT):tinyint,CAST(1 AS
SMALLINT):smallint,CAST(1 AS INT):int,CAST(1 AS BIGINT):bigint>
--- !query 9 output
+-- !query 8 output
true 1 1 1 1
--- !query 10
+-- !query 9
SELECT float(1), double(1), decimal(1)
--- !query 10 schema
+-- !query 9 schema
struct<CAST(1 AS FLOAT):float,CAST(1 AS DOUBLE):double,CAST(1 AS
DECIMAL(10,0)):decimal(10,0)>
--- !query 10 output
+-- !query 9 output
1.0 1.0 1
--- !query 11
+-- !query 10
SELECT date("2014-04-04"), timestamp(date("2014-04-04"))
--- !query 11 schema
+-- !query 10 schema
struct<CAST(2014-04-04 AS DATE):date,CAST(CAST(2014-04-04 AS DATE) AS
TIMESTAMP):timestamp>
--- !query 11 output
+-- !query 10 output
2014-04-04 2014-04-04 00:00:00
--- !query 12
+-- !query 11
SELECT string(1, 2)
--- !query 12 schema
+-- !query 11 schema
struct<>
--- !query 12 output
+-- !query 11 output
org.apache.spark.sql.AnalysisException
Function string accepts only one argument; line 1 pos 7
--- !query 13
+-- !query 12
CREATE TEMPORARY VIEW tempView1 AS VALUES (1, NAMED_STRUCT('col1', 'gamma',
'col2', 'delta')) AS T(id, st)
--- !query 13 schema
+-- !query 12 schema
struct<>
--- !query 13 output
+-- !query 12 output
--- !query 14
+-- !query 13
SELECT nvl(st.col1, "value"), count(*) FROM from tempView1 GROUP BY
nvl(st.col1, "value")
--- !query 14 schema
+-- !query 13 schema
struct<nvl(tempview1.`st`.`col1` AS `col1`, 'value'):string,FROM:bigint>
--- !query 14 output
+-- !query 13 output
gamma 1
diff --git
a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index e8f2e0a81455a..25d93b2063146 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
+-- Number of queries: 13
-- !query 0
@@ -29,151 +29,80 @@ abc
-- !query 3
-EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
--- !query 3 schema
-struct<plan:string>
--- !query 3 output
-== Parsed Logical Plan ==
-'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x]
-+- 'SubqueryAlias `__auto_generated_subquery_name`
- +- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x]
- +- 'UnresolvedTableValuedFunction range, [10]
-
-== Analyzed Logical Plan ==
-col: string
-Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as
string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x]
-+- SubqueryAlias `__auto_generated_subquery_name`
- +- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS
col4#xL]
- +- Range (0, 10, step=1, splits=None)
-
-== Optimized Logical Plan ==
-Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as
string), cast(id#xL as string)) AS col#x]
-+- Range (0, 10, step=1, splits=None)
-
-== Physical Plan ==
-*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as
string), cast(id#xL as string)) AS col#x]
-+- *Range (0, 10, step=1, splits=2)
-
-
--- !query 4
select replace('abc', 'b', '123')
--- !query 4 schema
+-- !query 3 schema
struct<replace(abc, b, 123):string>
--- !query 4 output
+-- !query 3 output
a123c
--- !query 5
+-- !query 4
select replace('abc', 'b')
--- !query 5 schema
+-- !query 4 schema
struct<replace(abc, b, ):string>
--- !query 5 output
+-- !query 4 output
ac
--- !query 6
+-- !query 5
select length(uuid()), (uuid() <> uuid())
--- !query 6 schema
+-- !query 5 schema
struct<length(uuid()):int,(NOT (uuid() = uuid())):boolean>
--- !query 6 output
+-- !query 5 output
36 true
--- !query 7
+-- !query 6
select position('bar' in 'foobarbar'), position(null, 'foobarbar'),
position('aaads', null)
--- !query 7 schema
+-- !query 6 schema
struct<locate(bar, foobarbar, 1):int,locate(CAST(NULL AS STRING), foobarbar,
1):int,locate(aaads, CAST(NULL AS STRING), 1):int>
--- !query 7 output
+-- !query 6 output
4 NULL NULL
--- !query 8
+-- !query 7
select left("abcd", 2), left("abcd", 5), left("abcd", '2'), left("abcd", null)
--- !query 8 schema
+-- !query 7 schema
struct<left('abcd', 2):string,left('abcd', 5):string,left('abcd',
'2'):string,left('abcd', NULL):string>
--- !query 8 output
+-- !query 7 output
ab abcd ab NULL
--- !query 9
+-- !query 8
select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a')
--- !query 9 schema
+-- !query 8 schema
struct<left(NULL, -2):string,left('abcd', -2):string,left('abcd',
0):string,left('abcd', 'a'):string>
--- !query 9 output
+-- !query 8 output
NULL NULL
--- !query 10
+-- !query 9
select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd",
null)
--- !query 10 schema
+-- !query 9 schema
struct<right('abcd', 2):string,right('abcd', 5):string,right('abcd',
'2'):string,right('abcd', NULL):string>
--- !query 10 output
+-- !query 9 output
cd abcd cd NULL
--- !query 11
+-- !query 10
select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a')
--- !query 11 schema
+-- !query 10 schema
struct<right(NULL, -2):string,right('abcd', -2):string,right('abcd',
0):string,right('abcd', 'a'):string>
--- !query 11 output
+-- !query 10 output
NULL NULL
--- !query 12
-set spark.sql.function.concatBinaryAsString=false
--- !query 12 schema
-struct<key:string,value:string>
--- !query 12 output
-spark.sql.function.concatBinaryAsString false
-
-
--- !query 13
-EXPLAIN SELECT ((col1 || col2) || (col3 || col4)) col
-FROM (
- SELECT
- string(id) col1,
- string(id + 1) col2,
- encode(string(id + 2), 'utf-8') col3,
- encode(string(id + 3), 'utf-8') col4
- FROM range(10)
-)
--- !query 13 schema
-struct<plan:string>
--- !query 13 output
-== Physical Plan ==
-*Project [concat(cast(id#xL as string), cast((id#xL + 1) as string),
cast(encode(cast((id#xL + 2) as string), utf-8) as string),
cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]
-+- *Range (0, 10, step=1, splits=2)
-
-
--- !query 14
-EXPLAIN SELECT (col1 || (col3 || col4)) col
-FROM (
- SELECT
- string(id) col1,
- encode(string(id + 2), 'utf-8') col3,
- encode(string(id + 3), 'utf-8') col4
- FROM range(10)
-)
--- !query 14 schema
-struct<plan:string>
--- !query 14 output
-== Physical Plan ==
-*Project [concat(cast(id#xL as string), cast(encode(cast((id#xL + 2) as
string), utf-8) as string), cast(encode(cast((id#xL + 3) as string), utf-8) as
string)) AS col#x]
-+- *Range (0, 10, step=1, splits=2)
-
-
--- !query 15
+-- !query 11
SELECT split('aa1cc2ee3', '[1-9]+')
--- !query 15 schema
+-- !query 11 schema
struct<split(aa1cc2ee3, [1-9]+, -1):array<string>>
--- !query 15 output
+-- !query 11 output
["aa","cc","ee",""]
--- !query 16
+-- !query 12
SELECT split('aa1cc2ee3', '[1-9]+', 2)
--- !query 16 schema
+-- !query 12 schema
struct<split(aa1cc2ee3, [1-9]+, 2):array<string>>
--- !query 16 output
+-- !query 12 output
["aa","cc2ee3"]
diff --git
a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
index 94af9181225d6..fdbea0ee90720 100644
---
a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 8
-- !query 0
@@ -99,42 +99,3 @@ struct<id:bigint>
-- !query 7 output
0
1
-
-
--- !query 8
-EXPLAIN select * from RaNgE(2)
--- !query 8 schema
-struct<plan:string>
--- !query 8 output
-== Physical Plan ==
-*Range (0, 2, step=1, splits=2)
-
-
--- !query 9
-EXPLAIN EXTENDED SELECT * FROM range(3) CROSS JOIN range(3)
--- !query 9 schema
-struct<plan:string>
--- !query 9 output
-== Parsed Logical Plan ==
-'Project [*]
-+- 'Join Cross
- :- 'UnresolvedTableValuedFunction range, [3]
- +- 'UnresolvedTableValuedFunction range, [3]
-
-== Analyzed Logical Plan ==
-id: bigint, id: bigint
-Project [id#xL, id#xL]
-+- Join Cross
- :- Range (0, 3, step=1, splits=None)
- +- Range (0, 3, step=1, splits=None)
-
-== Optimized Logical Plan ==
-Join Cross
-:- Range (0, 3, step=1, splits=None)
-+- Range (0, 3, step=1, splits=None)
-
-== Physical Plan ==
-BroadcastNestedLoopJoin BuildRight, Cross
-:- *Range (0, 3, step=1, splits=2)
-+- BroadcastExchange IdentityBroadcastMode
- +- *Range (0, 3, step=1, splits=2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 56d300e30a58e..ce475922eb5e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.StructType
@@ -29,10 +30,11 @@ class ExplainSuite extends QueryTest with SharedSQLContext {
private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*):
Unit = {
val output = new java.io.ByteArrayOutputStream()
Console.withOut(output) {
- df.explain(extended = false)
+ df.explain(extended = true)
}
+ val normalizedOutput = output.toString.replaceAll("#\\d+", "#x")
for (key <- keywords) {
- assert(output.toString.contains(key))
+ assert(normalizedOutput.contains(key))
}
}
@@ -53,6 +55,133 @@ class ExplainSuite extends QueryTest with SharedSQLContext {
checkKeywordsExistsInExplain(df,
keywords = "InMemoryRelation", "StorageLevel(disk, memory, deserialized,
1 replicas)")
}
+
+ test("optimized plan should show the rewritten aggregate expression") {
+ withTempView("test_agg") {
+ sql(
+ """
+ |CREATE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ | (1, true), (1, false),
+ | (2, true),
+ | (3, false), (3, null),
+ | (4, null), (4, null),
+ | (5, null), (5, true), (5, false) AS test_agg(k, v)
+ """.stripMargin)
+
+ // simple explain of queries having every/some/any aggregates. Optimized
+ // plan should show the rewritten aggregate expression.
+ val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY
k")
+ checkKeywordsExistsInExplain(df,
+ "Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS some(v)#x,
" +
+ "max(v#x) AS any(v)#x]")
+ }
+ }
+
+ test("explain inline tables cross-joins") {
+ val df = sql(
+ """
+ |SELECT * FROM VALUES ('one', 1), ('three', null)
+ | CROSS JOIN VALUES ('one', 1), ('three', null)
+ """.stripMargin)
+ checkKeywordsExistsInExplain(df,
+ "Join Cross",
+ ":- LocalRelation [col1#x, col2#x]",
+ "+- LocalRelation [col1#x, col2#x]")
+ }
+
+ test("explain table valued functions") {
+ checkKeywordsExistsInExplain(sql("select * from RaNgE(2)"), "Range (0, 2,
step=1, splits=None)")
+ checkKeywordsExistsInExplain(sql("SELECT * FROM range(3) CROSS JOIN
range(3)"),
+ "Join Cross",
+ ":- Range (0, 3, step=1, splits=None)",
+ "+- Range (0, 3, step=1, splits=None)")
+ }
+
+ test("explain string functions") {
+ // Check if catalyst combine nested `Concat`s
+ val df1 = sql(
+ """
+ |SELECT (col1 || col2 || col3 || col4) col
+ | FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
+ """.stripMargin)
+ checkKeywordsExistsInExplain(df1,
+ "Project [concat(cast(id#xL as string), cast(id#xL as string),
cast(id#xL as string)" +
+ ", cast(id#xL as string)) AS col#x]")
+
+ // Check if catalyst combine nested `Concat`s if concatBinaryAsString=false
+ withSQLConf(SQLConf.CONCAT_BINARY_AS_STRING.key -> "false") {
+ val df2 = sql(
+ """
+ |SELECT ((col1 || col2) || (col3 || col4)) col
+ |FROM (
+ | SELECT
+ | string(id) col1,
+ | string(id + 1) col2,
+ | encode(string(id + 2), 'utf-8') col3,
+ | encode(string(id + 3), 'utf-8') col4
+ | FROM range(10)
+ |)
+ """.stripMargin)
+ checkKeywordsExistsInExplain(df2,
+ "Project [concat(cast(id#xL as string), cast((id#xL + 1) as string), "
+
+ "cast(encode(cast((id#xL + 2) as string), utf-8) as string), " +
+ "cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS
col#x]")
+
+ val df3 = sql(
+ """
+ |SELECT (col1 || (col3 || col4)) col
+ |FROM (
+ | SELECT
+ | string(id) col1,
+ | encode(string(id + 2), 'utf-8') col3,
+ | encode(string(id + 3), 'utf-8') col4
+ | FROM range(10)
+ |)
+ """.stripMargin)
+ checkKeywordsExistsInExplain(df3,
+ "Project [concat(cast(id#xL as string), " +
+ "cast(encode(cast((id#xL + 2) as string), utf-8) as string), " +
+ "cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS
col#x]")
+ }
+ }
+
+ test("check operator precedence") {
+ // We follow Oracle operator precedence in the table below that lists the
levels
+ // of precedence among SQL operators from high to low:
+ //
---------------------------------------------------------------------------------------
+ // Operator Operation
+ //
---------------------------------------------------------------------------------------
+ // +, - identity, negation
+ // *, / multiplication,
division
+ // +, -, || addition,
subtraction, concatenation
+ // =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison
+ // NOT exponentiation,
logical negation
+ // AND conjunction
+ // OR disjunction
+ //
---------------------------------------------------------------------------------------
+ checkKeywordsExistsInExplain(sql("select 'a' || 1 + 2"),
+ "Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2
AS DOUBLE))#x]")
+ checkKeywordsExistsInExplain(sql("select 1 - 2 || 'b'"),
+ "Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x]")
+ checkKeywordsExistsInExplain(sql("select 2 * 4 + 3 || 'b'"),
+ "Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x]")
+ checkKeywordsExistsInExplain(sql("select 3 + 1 || 'a' || 4 / 2"),
+ "Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), " +
+ "CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x]")
+ checkKeywordsExistsInExplain(sql("select 1 == 1 OR 'a' || 'b' == 'ab'"),
+ "Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]")
+ checkKeywordsExistsInExplain(sql("select 'a' || 'c' == 'ac' AND 2 == 3"),
+ "Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]")
+ }
+
+ test("explain for these functions; use range to avoid constant folding") {
+ val df = sql("select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'),
nvl2(id, 'x', 'y') " +
+ "from range(2)")
+ checkKeywordsExistsInExplain(df,
+ "Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, " +
+ "id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS
nvl(`id`, 'x')#x, " +
+ "x AS nvl2(`id`, 'x', 'y')#x]")
+ }
}
case class ExplainSingleData(id: Int)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index cf4585bf7ac6c..b2515226d9a14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -137,28 +137,39 @@ class SQLQueryTestSuite extends QueryTest with
SharedSQLContext {
}
}
+ // For better test coverage, runs the tests on mixed config sets:
WHOLESTAGE_CODEGEN_ENABLED
+ // and CODEGEN_FACTORY_MODE.
+ private lazy val codegenConfigSets = Array(
+ ("true", "CODEGEN_ONLY"),
+ ("false", "CODEGEN_ONLY"),
+ ("false", "NO_CODEGEN")
+ ).map { case (wholeStageCodegenEnabled, codegenFactoryMode) =>
+ Array(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> wholeStageCodegenEnabled,
+ SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactoryMode)
+ }
+
/** Run a test case. */
private def runTest(testCase: TestCase): Unit = {
val input = fileToString(new File(testCase.inputFile))
val (comments, code) = input.split("\n").partition(_.startsWith("--"))
- // Runs all the tests on both codegen-only and interpreter modes
- val codegenConfigSets = Array(CODEGEN_ONLY, NO_CODEGEN).map {
- case codegenFactoryMode =>
- Array(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactoryMode.toString)
- }
- val configSets = {
- val configLines =
comments.filter(_.startsWith("--SET")).map(_.substring(5))
- val configs = configLines.map(_.split(",").map { confAndValue =>
- val (conf, value) = confAndValue.span(_ != '=')
- conf.trim -> value.substring(1).trim
- })
- // When we are regenerating the golden files, we don't need to set any
config as they
- // all need to return the same result
- if (regenerateGoldenFiles) {
- Array.empty[Array[(String, String)]]
- } else {
+ // List of SQL queries to run
+ // note: this is not a robust way to split queries using semicolon, but
works for now.
+ val queries =
code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
+
+ // When we are regenerating the golden files, we don't need to set any
config as they
+ // all need to return the same result
+ if (regenerateGoldenFiles) {
+ runQueries(queries, testCase.resultFile, None)
+ } else {
+ val configSets = {
+ val configLines =
comments.filter(_.startsWith("--SET")).map(_.substring(5))
+ val configs = configLines.map(_.split(",").map { confAndValue =>
+ val (conf, value) = confAndValue.span(_ != '=')
+ conf.trim -> value.substring(1).trim
+ })
+
if (configs.nonEmpty) {
codegenConfigSets.flatMap { codegenConfig =>
configs.map { config =>
@@ -169,15 +180,7 @@ class SQLQueryTestSuite extends QueryTest with
SharedSQLContext {
codegenConfigSets
}
}
- }
- // List of SQL queries to run
- // note: this is not a robust way to split queries using semicolon, but
works for now.
- val queries =
code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
-
- if (configSets.isEmpty) {
- runQueries(queries, testCase.resultFile, None)
- } else {
configSets.foreach { configSet =>
try {
runQueries(queries, testCase.resultFile, Some(configSet))
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]