AngersZhuuuu commented on a change in pull request #29414:
URL: https://github.com/apache/spark/pull/29414#discussion_r475341428
##########
File path: sql/core/src/test/resources/sql-tests/results/transform.sql.out
##########
@@ -0,0 +1,224 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 15
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW t AS SELECT * FROM VALUES
+('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100),
bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'),
date('2000-04-01')),
+('2', false, unhex('537061726B2053514C'), tinyint(2), 2, smallint(200),
bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'),
date('2000-04-02')),
+('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300),
bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'),
date('2000-04-03'))
+AS t(a, b, c, d, e, f, g, h, i, j, k, l)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'cat' AS (a)
+FROM t
+-- !query schema
+struct<a:string>
+-- !query output
+1
+2
+3
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'some_non_existent_command' AS (a)
+FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+Subprocess exited with status 127. Error: /bin/bash:
some_non_existent_command: command not found
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'python some_non_existent_file' AS (a)
+FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+Subprocess exited with status 2. Error: python: can't open file
'some_non_existent_file': [Errno 2] No such file or directory
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+ SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+ USING 'cat' AS (
+ a string,
+ b boolean,
+ c binary,
+ d tinyint,
+ e int,
+ f smallint,
+ g long,
+ h float,
+ i double,
+ j decimal(38, 18),
+ k timestamp,
+ l date)
+ FROM t
+) tmp
+-- !query schema
+struct<a:string,b:boolean,decode(c,
UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:timestamp,l:date>
+-- !query output
+1 true Spark SQL 1 1 100 1 1.0 1.0
1.000000000000000000 1997-01-02 00:00:00 2000-04-01
+2 false Spark SQL 2 2 200 2 2.0 2.0
2.000000000000000000 1997-01-02 03:04:05 2000-04-02
+3 true Spark SQL 3 3 300 3 3.0 3.0
3.000000000000000000 1997-02-10 17:32:01 2000-04-03
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+ SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+ USING 'cat' AS (
+ a string,
+ b string,
+ c string,
+ d string,
+ e string,
+ f string,
+ g string,
+ h string,
+ i string,
+ j string,
+ k string,
+ l string)
+ FROM t
+) tmp
+-- !query schema
+struct<a:string,b:string,decode(CAST(c AS BINARY),
UTF-8):string,d:string,e:string,f:string,g:string,h:string,i:string,j:string,k:string,l:string>
+-- !query output
+1 true Spark SQL 1 1 100 1 1.0 1.0
1 1997-01-02 00:00:00 2000-04-01
+2 false Spark SQL 2 2 200 2 2.0 2.0
2 1997-01-02 03:04:05 2000-04-02
+3 true Spark SQL 3 3 300 3 3.0 3.0
3 1997-02-10 17:32:01 2000-04-03
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'cat'
+FROM t
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+1
+
+
+-- !query
+SELECT TRANSFORM(a, b)
+USING 'cat'
+FROM t
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+1 true
+2 false
+3 true
+
+
+-- !query
+SELECT TRANSFORM(a, b, c)
+USING 'cat'
+FROM t
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+1 true
+2 false
+3 true
+
+
+-- !query
+SELECT TRANSFORM(a, b, c, d, e, f, g, h, i)
+USING 'cat' AS (a int, b short, c long, d byte, e float, f double, g
decimal(38, 18), h date, i timestamp)
+FROM VALUES
+('a','','1231a','a','213.21a','213.21a','0a.21d','2000-04-01123','1997-0102
00:00:') tmp(a, b, c, d, e, f, g, h, i)
+-- !query schema
+struct<a:int,b:smallint,c:bigint,d:tinyint,e:float,f:double,g:decimal(38,18),h:date,i:timestamp>
+-- !query output
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+
+
+-- !query
+SELECT TRANSFORM(b, max(a), sum(f))
+USING 'cat' AS (a, b)
+FROM t
+GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 'GROUP' expecting {<EOF>, ';'}(line 4, pos 0)
+
+== SQL ==
+SELECT TRANSFORM(b, max(a), sum(f))
+USING 'cat' AS (a, b)
+FROM t
+GROUP BY b
+^^^
+
+
+-- !query
+MAP a, b USING 'cat' AS (a, b) FROM t
+-- !query schema
+struct<a:string,b:string>
+-- !query output
+1 true
+2 false
+3 true
+
+
+-- !query
+REDUCE a, b USING 'cat' AS (a, b) FROM t
+-- !query schema
+struct<a:string,b:string>
+-- !query output
+1 true
+2 false
+3 true
+
+
+-- !query
+SELECT TRANSFORM(a, b, c, null)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ LINES TERMINATED BY '\n'
+ NULL DEFINED AS 'NULL'
+USING 'cat' AS (a, b, c, d)
Review comment:
> Also, could you add test cases for the parser, too?
https://github.com/apache/spark/pull/29414/files#diff-36e2b29ae675caaa1fce16e74fbd8710R1135
Add some UT in transform.sql
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]