This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 7f9ca1b37e14 [SPARK-52590][SQL][TESTS] Add SQL query tests for SQL
functions without explicit return types
7f9ca1b37e14 is described below
commit 7f9ca1b37e140303af0d70e2895d19314f812661
Author: Allison Wang <[email protected]>
AuthorDate: Fri Jun 27 22:04:29 2025 +0800
[SPARK-52590][SQL][TESTS] Add SQL query tests for SQL functions without
explicit return types
### What changes were proposed in this pull request?
SQL UDFs support implicit return types. UThis PR adds more tests on this.
### Why are the changes needed?
To improve test coverage
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Test only
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #51296 from allisonwang-db/spark-52590-opt-return-type-test.
Authored-by: Allison Wang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit af5632f49260fa05221c32e6306d355113fd2648)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql-tests/analyzer-results/sql-udf.sql.out | 220 +++++++++++++++++++
.../test/resources/sql-tests/inputs/sql-udf.sql | 78 +++++++
.../resources/sql-tests/results/sql-udf.sql.out | 240 +++++++++++++++++++++
3 files changed, 538 insertions(+)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
index 7d8111ce47f3..488387fc9019 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
@@ -1659,6 +1659,226 @@ Project [spark_catalog.default.bar1_10(b#x) AS
spark_catalog.default.bar1_10(3)#
+- OneRowRelation
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11a() RETURN 42
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11a, , 42, false, false,
false, true
+
+
+-- !query
+SELECT foo1_11a()
+-- !query analysis
+Project [spark_catalog.default.foo1_11a() AS
spark_catalog.default.foo1_11a()#x]
++- Project
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11b() RETURN 'hello world'
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11b, , 'hello world',
false, false, false, true
+
+
+-- !query
+SELECT foo1_11b()
+-- !query analysis
+Project [spark_catalog.default.foo1_11b() AS
spark_catalog.default.foo1_11b()#x]
++- Project
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11c(a INT, b INT) RETURN a + b
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11c, a INT, b INT, , a +
b, false, false, false, true
+
+
+-- !query
+SELECT foo1_11c(3, 5)
+-- !query analysis
+Project [spark_catalog.default.foo1_11c(a#x, b#x) AS
spark_catalog.default.foo1_11c(3, 5)#x]
++- Project [cast(3 as int) AS a#x, cast(5 as int) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11d(a DOUBLE, b INT) RETURN a * b + 1.5
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11d, a DOUBLE, b INT, , a
* b + 1.5, false, false, false, true
+
+
+-- !query
+SELECT foo1_11d(3.0, 5)
+-- !query analysis
+Project [spark_catalog.default.foo1_11d(a#x, b#x) AS
spark_catalog.default.foo1_11d(3.0, 5)#x]
++- Project [cast(3.0 as double) AS a#x, cast(5 as int) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11e(a INT) RETURN a > 10
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11e, a INT, , a > 10,
false, false, false, true
+
+
+-- !query
+SELECT foo1_11e(15), foo1_11e(5)
+-- !query analysis
+Project [spark_catalog.default.foo1_11e(a#x) AS
spark_catalog.default.foo1_11e(15)#x, spark_catalog.default.foo1_11e(a#x) AS
spark_catalog.default.foo1_11e(5)#x]
++- Project [cast(15 as int) AS a#x, cast(5 as int) AS a#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11f(d DATE) RETURN d + INTERVAL '1' DAY
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11f, d DATE, , d +
INTERVAL '1' DAY, false, false, false, true
+
+
+-- !query
+SELECT foo1_11f(DATE '2024-01-01')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11g(n INT) RETURN ARRAY(1, 2, n)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11g, n INT, , ARRAY(1, 2,
n), false, false, false, true
+
+
+-- !query
+SELECT foo1_11g(5)
+-- !query analysis
+Project [spark_catalog.default.foo1_11g(n#x) AS
spark_catalog.default.foo1_11g(5)#x]
++- Project [cast(5 as int) AS n#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11h(a INT, b STRING) RETURN STRUCT(a, b)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11h, a INT, b STRING, ,
STRUCT(a, b), false, false, false, true
+
+
+-- !query
+SELECT foo1_11h(1, 'test')
+-- !query analysis
+Project [spark_catalog.default.foo1_11h(a#x, b#x) AS
spark_catalog.default.foo1_11h(1, test)#x]
++- Project [cast(1 as int) AS a#x, cast(test as string) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11i(x INT) RETURN (SELECT x * 2)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11i, x INT, , (SELECT x *
2), false, false, false, true
+
+
+-- !query
+SELECT foo1_11i(5)
+-- !query analysis
+Project [spark_catalog.default.foo1_11i(x#x) AS
spark_catalog.default.foo1_11i(5)#x]
++- Project [cast(5 as int) AS x#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11j(s STRING) RETURN UPPER(s)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11j, s STRING, , UPPER(s),
false, false, false, true
+
+
+-- !query
+SELECT foo1_11j('hello')
+-- !query analysis
+Project [spark_catalog.default.foo1_11j(s#x) AS
spark_catalog.default.foo1_11j(hello)#x]
++- Project [cast(hello as string) AS s#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11k(a INT, b STRING) RETURN CONCAT(CAST(a AS
STRING), '_', b)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11k, a INT, b STRING, ,
CONCAT(CAST(a AS STRING), '_', b), false, false, false, true
+
+
+-- !query
+SELECT foo1_11k(123, 'test')
+-- !query analysis
+Project [spark_catalog.default.foo1_11k(a#x, b#x) AS
spark_catalog.default.foo1_11k(123, test)#x]
++- Project [cast(123 as int) AS a#x, cast(test as string) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11l() RETURNS TABLE RETURN SELECT 1 as id,
'hello' as name
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11l, TABLE, SELECT 1 as
id, 'hello' as name, true, false, false, true
+
+
+-- !query
+SELECT * FROM foo1_11l()
+-- !query analysis
+Project [id#x, name#x]
++- SQLFunctionNode spark_catalog.default.foo1_11l
+ +- SubqueryAlias foo1_11l
+ +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+ +- Project [1 AS id#x, hello AS name#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11m(a INT, b STRING) RETURNS TABLE RETURN
SELECT a * 2 as doubled, UPPER(b) as upper_name
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11m, a INT, b STRING,
TABLE, SELECT a * 2 as doubled, UPPER(b) as upper_name, true, false, false, true
+
+
+-- !query
+SELECT * FROM foo1_11m(5, 'world')
+-- !query analysis
+Project [doubled#x, upper_name#x]
++- SQLFunctionNode spark_catalog.default.foo1_11m
+ +- SubqueryAlias foo1_11m
+ +- Project [cast(doubled#x as int) AS doubled#x, cast(upper_name#x as
string) AS upper_name#x]
+ +- Project [(cast(5 as int) * 2) AS doubled#x, upper(cast(world as
string)) AS upper_name#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11n(arr ARRAY<INT>) RETURNS TABLE RETURN
SELECT size(arr) as array_size, arr[0] as first_element
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11n, arr ARRAY<INT>,
TABLE, SELECT size(arr) as array_size, arr[0] as first_element, true, false,
false, true
+
+
+-- !query
+SELECT * FROM foo1_11n(ARRAY(1, 2, 3))
+-- !query analysis
+Project [array_size#x, first_element#x]
++- SQLFunctionNode spark_catalog.default.foo1_11n
+ +- SubqueryAlias foo1_11n
+ +- Project [cast(array_size#x as int) AS array_size#x,
cast(first_element#x as int) AS first_element#x]
+ +- Project [size(cast(array(1, 2, 3) as array<int>), false) AS
array_size#x, cast(array(1, 2, 3) as array<int>)[0] AS first_element#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11o(id INT, name STRING) RETURNS TABLE RETURN
SELECT STRUCT(id, name) as person_info, id + 100 as modified_id
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1_11o, id INT, name STRING,
TABLE, SELECT STRUCT(id, name) as person_info, id + 100 as modified_id, true,
false, false, true
+
+
+-- !query
+SELECT * FROM foo1_11o(1, 'Alice')
+-- !query analysis
+Project [person_info#x, modified_id#x]
++- SQLFunctionNode spark_catalog.default.foo1_11o
+ +- SubqueryAlias foo1_11o
+ +- Project [cast(person_info#x as struct<id:int,name:string>) AS
person_info#x, cast(modified_id#x as int) AS modified_id#x]
+ +- Project [struct(id, cast(1 as int), name, cast(Alice as string))
AS person_info#x, (cast(1 as int) + 100) AS modified_id#x]
+ +- OneRowRelation
+
+
-- !query
CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
index 849207119a50..1cb749e77099 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
@@ -343,6 +343,84 @@ CREATE OR REPLACE FUNCTION foo1_10(a INT) RETURNS INT
RETURN a + 2;
CREATE OR REPLACE FUNCTION bar1_10(b INT) RETURNS STRING RETURN
foo1_10(TRY_CAST(b AS STRING));
SELECT bar1_10(3);
+-- 1.11 Optional return types (type inference)
+-- 1.11.a Scalar UDF without RETURNS clause - return type inferred from body
+-- Simple literal return
+CREATE OR REPLACE FUNCTION foo1_11a() RETURN 42;
+-- Expect: 42
+SELECT foo1_11a();
+
+-- String literal return
+CREATE OR REPLACE FUNCTION foo1_11b() RETURN 'hello world';
+-- Expect: 'hello world'
+SELECT foo1_11b();
+
+-- Expression return - should infer INT
+CREATE OR REPLACE FUNCTION foo1_11c(a INT, b INT) RETURN a + b;
+-- Expect: 8
+SELECT foo1_11c(3, 5);
+
+-- Expression return - should infer DOUBLE
+CREATE OR REPLACE FUNCTION foo1_11d(a DOUBLE, b INT) RETURN a * b + 1.5;
+-- Expect: 16.5
+SELECT foo1_11d(3.0, 5);
+
+-- Boolean expression return
+CREATE OR REPLACE FUNCTION foo1_11e(a INT) RETURN a > 10;
+-- Expect: true, false
+SELECT foo1_11e(15), foo1_11e(5);
+
+-- Date arithmetic return
+CREATE OR REPLACE FUNCTION foo1_11f(d DATE) RETURN d + INTERVAL '1' DAY;
+-- Expect: 2024-01-02
+SELECT foo1_11f(DATE '2024-01-01');
+
+-- Array return
+CREATE OR REPLACE FUNCTION foo1_11g(n INT) RETURN ARRAY(1, 2, n);
+-- Expect: [1, 2, 5]
+SELECT foo1_11g(5);
+
+-- Struct return
+CREATE OR REPLACE FUNCTION foo1_11h(a INT, b STRING) RETURN STRUCT(a, b);
+-- Expect: {1, 'test'}
+SELECT foo1_11h(1, 'test');
+
+-- Subquery return - scalar
+CREATE OR REPLACE FUNCTION foo1_11i(x INT) RETURN (SELECT x * 2);
+-- Expect: 10
+SELECT foo1_11i(5);
+
+-- Function call return
+CREATE OR REPLACE FUNCTION foo1_11j(s STRING) RETURN UPPER(s);
+-- Expect: 'HELLO'
+SELECT foo1_11j('hello');
+
+-- Complex expression with multiple types
+CREATE OR REPLACE FUNCTION foo1_11k(a INT, b STRING) RETURN CONCAT(CAST(a AS
STRING), '_', b);
+-- Expect: '123_test'
+SELECT foo1_11k(123, 'test');
+
+-- 1.11.b Table UDF without TABLE schema - schema inferred from body
+-- Simple SELECT with literals
+CREATE OR REPLACE FUNCTION foo1_11l() RETURNS TABLE RETURN SELECT 1 as id,
'hello' as name;
+-- Expect: (1, 'hello')
+SELECT * FROM foo1_11l();
+
+-- SELECT with expressions
+CREATE OR REPLACE FUNCTION foo1_11m(a INT, b STRING) RETURNS TABLE RETURN
SELECT a * 2 as doubled, UPPER(b) as upper_name;
+-- Expect: (10, 'WORLD')
+SELECT * FROM foo1_11m(5, 'world');
+
+-- SELECT with complex data types
+CREATE OR REPLACE FUNCTION foo1_11n(arr ARRAY<INT>) RETURNS TABLE RETURN
SELECT size(arr) as array_size, arr[0] as first_element;
+-- Expect: (3, 1)
+SELECT * FROM foo1_11n(ARRAY(1, 2, 3));
+
+-- SELECT with struct columns
+CREATE OR REPLACE FUNCTION foo1_11o(id INT, name STRING) RETURNS TABLE RETURN
SELECT STRUCT(id, name) as person_info, id + 100 as modified_id;
+-- Expect: ({1, 'Alice'}, 101)
+SELECT * FROM foo1_11o(1, 'Alice');
+
-------------------------------
-- 2. Scalar SQL UDF
-- 2.1 deterministic simple expressions
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
index eab2470d3ffb..dd96c3f49604 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
@@ -1694,6 +1694,246 @@ struct<spark_catalog.default.bar1_10(3):string>
5
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11a() RETURN 42
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11a()
+-- !query schema
+struct<spark_catalog.default.foo1_11a():int>
+-- !query output
+42
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11b() RETURN 'hello world'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11b()
+-- !query schema
+struct<spark_catalog.default.foo1_11b():string>
+-- !query output
+hello world
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11c(a INT, b INT) RETURN a + b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11c(3, 5)
+-- !query schema
+struct<spark_catalog.default.foo1_11c(3, 5):int>
+-- !query output
+8
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11d(a DOUBLE, b INT) RETURN a * b + 1.5
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11d(3.0, 5)
+-- !query schema
+struct<spark_catalog.default.foo1_11d(3.0, 5):double>
+-- !query output
+16.5
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11e(a INT) RETURN a > 10
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11e(15), foo1_11e(5)
+-- !query schema
+struct<spark_catalog.default.foo1_11e(15):boolean,spark_catalog.default.foo1_11e(5):boolean>
+-- !query output
+true false
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11f(d DATE) RETURN d + INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11f(DATE '2024-01-01')
+-- !query schema
+struct<spark_catalog.default.foo1_11f(DATE '2024-01-01'):date>
+-- !query output
+2024-01-02
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11g(n INT) RETURN ARRAY(1, 2, n)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11g(5)
+-- !query schema
+struct<spark_catalog.default.foo1_11g(5):array<int>>
+-- !query output
+[1,2,5]
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11h(a INT, b STRING) RETURN STRUCT(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11h(1, 'test')
+-- !query schema
+struct<spark_catalog.default.foo1_11h(1, test):struct<a:int,b:string>>
+-- !query output
+{"a":1,"b":"test"}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11i(x INT) RETURN (SELECT x * 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11i(5)
+-- !query schema
+struct<spark_catalog.default.foo1_11i(5):int>
+-- !query output
+10
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11j(s STRING) RETURN UPPER(s)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11j('hello')
+-- !query schema
+struct<spark_catalog.default.foo1_11j(hello):string>
+-- !query output
+HELLO
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11k(a INT, b STRING) RETURN CONCAT(CAST(a AS
STRING), '_', b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1_11k(123, 'test')
+-- !query schema
+struct<spark_catalog.default.foo1_11k(123, test):string>
+-- !query output
+123_test
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11l() RETURNS TABLE RETURN SELECT 1 as id,
'hello' as name
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM foo1_11l()
+-- !query schema
+struct<id:int,name:string>
+-- !query output
+1 hello
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11m(a INT, b STRING) RETURNS TABLE RETURN
SELECT a * 2 as doubled, UPPER(b) as upper_name
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM foo1_11m(5, 'world')
+-- !query schema
+struct<doubled:int,upper_name:string>
+-- !query output
+10 WORLD
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11n(arr ARRAY<INT>) RETURNS TABLE RETURN
SELECT size(arr) as array_size, arr[0] as first_element
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM foo1_11n(ARRAY(1, 2, 3))
+-- !query schema
+struct<array_size:int,first_element:int>
+-- !query output
+3 1
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1_11o(id INT, name STRING) RETURNS TABLE RETURN
SELECT STRUCT(id, name) as person_info, id + 100 as modified_id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM foo1_11o(1, 'Alice')
+-- !query schema
+struct<person_info:struct<id:int,name:string>,modified_id:int>
+-- !query output
+{"id":1,"name":"Alice"} 101
+
+
-- !query
CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]