This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new ccfb64b501a4 [SPARK-51439][SQL] Support SQL UDF with DEFAULT argument
ccfb64b501a4 is described below
commit ccfb64b501a415bca927dcb8ae1e60d517a4933e
Author: Haoyu Weng <[email protected]>
AuthorDate: Thu Apr 24 08:01:36 2025 +0800
[SPARK-51439][SQL] Support SQL UDF with DEFAULT argument
Continuing allisonwang-db's work on #50373 and #49471
This PR adds support for DEFAULT arguments in SQL UDF. Examples:
```sql
CREATE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a;
SELECT foo1d1(); -- 10
SELECT foo1d1(20); -- 20
CREATE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT)
RETURN SELECT a, b;
SELECT * FROM foo1d6(5); -- 5, 7
SELECT * FROM foo1d6(5, 2); -- 5, 2
```
See sql-udf.sql for more valid and invalid examples.
To support default arguments in SQL UDFs.
Yes. Now SQL UDFs support DEFAULT arguments.
A side effect of the grammar change is that some invalid function parameter
definitions are now no longer rejected by the grammar, but instead rejected by
the parser logic.
Examples:
```sql
-- multiple COMMENT or multiple NOT NULL
CREATE TEMPORARY FUNCTION foo(a INT COMMENT 'hello' COMMENT 'world')
RETURNS INT RETURN a;
-- before:
[PARSE_SYNTAX_ERROR] Syntax error at or near 'COMMENT'. SQLSTATE: 42601
== SQL (line 2, position 1) ==
CREATE TEMPORARY FUNCTION foo(a INT COMMENT 'hello' COMMENT 'world')
RETURNS INT RETURN a;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-- after:
[CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE] CREATE TABLE column a specifies
descriptor "COMMENT" more than once, which is invalid. SQLSTATE: 42710
== SQL (line 1, position 1) ==
CREATE TEMPORARY FUNCTION foo(a INT COMMENT 'hello' COMMENT 'world')...
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
```
```sql
-- GENERATED ALWAYS AS
CREATE TEMPORARY FUNCTION foo(a INT GENERATED ALWAYS AS (1)) RETURNS INT
RETURN a;
-- before:
[PARSE_SYNTAX_ERROR] Syntax error at or near 'GENERATED'. SQLSTATE: 42601
== SQL (line 2, position 1) ==
CREATE TEMPORARY FUNCTION foo(a INT GENERATED ALWAYS AS (1)) RETURNS INT
RETURN a;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-- after:
[INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS]
Invalid SQL syntax: CREATE FUNCTION with generated columns as parameters is not
allowed. SQLSTATE: 42000
== SQL (line 2, position 1) ==
CREATE TEMPORARY FUNCTION foo(a INT GENERATED ALWAYS AS (1)) RETURNS INT
RETURN a;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
```
This doesn't change the behavior of existing valid SQL.
End-to-end regression tests in `sql-udf.sql` and simple tests in
`SQLFunctionSuite`.
No
Closes #50408 from wengh/sql-udf-default.
Lead-authored-by: Haoyu Weng <[email protected]>
Co-authored-by: Allison Wang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 10 +
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 4 +
.../spark/sql/catalyst/util/SparkParserUtils.scala | 8 +
.../spark/sql/errors/QueryParsingErrors.scala | 10 +
.../org/apache/spark/sql/types/StructField.scala | 13 +-
.../org/apache/spark/sql/types/StructType.scala | 1 +
.../spark/sql/catalyst/catalog/SQLFunction.scala | 2 +-
.../sql/catalyst/catalog/UserDefinedFunction.scala | 5 +
.../sql/catalyst/parser/AbstractSqlParser.scala | 8 +
.../spark/sql/catalyst/parser/AstBuilder.scala | 48 ++-
.../sql/catalyst/parser/ParserInterface.scala | 7 +
.../spark/sql/catalyst/parser/ParserUtils.scala | 6 -
.../SparkConnectWithSessionExtensionSuite.scala | 3 +
.../spark/sql/execution/SparkSqlParser.scala | 11 +
.../command/CreateSQLFunctionCommand.scala | 4 +-
.../sql-tests/analyzer-results/sql-udf.sql.out | 357 ++++++++++++++++++-
.../test/resources/sql-tests/inputs/sql-udf.sql | 103 +++++-
.../resources/sql-tests/results/sql-udf.sql.out | 393 ++++++++++++++++++++-
.../spark/sql/SparkSessionExtensionSuite.scala | 4 +
.../spark/sql/execution/SQLFunctionSuite.scala | 13 +
20 files changed, 958 insertions(+), 52 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 47c1151d3c86..334b95122be6 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3366,6 +3366,16 @@
"ANALYZE TABLE(S) ... COMPUTE STATISTICS ... <ctx> must be either
NOSCAN or empty."
]
},
+ "CREATE_FUNC_WITH_COLUMN_CONSTRAINTS" : {
+ "message" : [
+ "CREATE FUNCTION with constraints on parameters is not allowed."
+ ]
+ },
+ "CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS" : {
+ "message" : [
+ "CREATE FUNCTION with generated columns as parameters is not
allowed."
+ ]
+ },
"CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE" : {
"message" : [
"Cannot create a routine with both IF NOT EXISTS and REPLACE
specified."
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 59a0b1ce7a3c..38c88443be2a 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -177,6 +177,10 @@ singleTableSchema
: colTypeList EOF
;
+singleRoutineParamList
+ : colDefinitionList EOF
+ ;
+
statement
: query
#statementDefault
| executeImmediate
#visitExecuteImmediate
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
index 01ee89908570..9c9e623e0339 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
@@ -127,6 +127,14 @@ trait SparkParserUtils {
}
}
+ /** Get the code that creates the given node. */
+ def source(ctx: ParserRuleContext): String = {
+ // Note: `exprCtx.getText` returns a string without spaces, so we need to
+ // get the text from the underlying char stream instead.
+ val stream = ctx.getStart.getInputStream
+ stream.getText(Interval.of(ctx.getStart.getStartIndex,
ctx.getStop.getStopIndex))
+ }
+
/** Convert a string token into a string. */
def string(token: Token): String = unescapeSQLString(token.getText)
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 0bd9f3801498..5ea96b1240c1 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -656,6 +656,16 @@ private[sql] object QueryParsingErrors extends
DataTypeErrorsBase {
ctx)
}
+ def createFuncWithGeneratedColumnsError(ctx: ParserRuleContext): Throwable =
{
+ new ParseException(
+ errorClass =
"INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS",
+ ctx)
+ }
+
+ def createFuncWithConstraintError(ctx: ParserRuleContext): Throwable = {
+ new ParseException(errorClass =
"INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_COLUMN_CONSTRAINTS", ctx)
+ }
+
def defineTempFuncWithIfNotExistsError(ctx: ParserRuleContext): Throwable = {
new ParseException(errorClass =
"INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS", ctx)
}
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
index a0e08745d8af..60362ec46f53 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -150,10 +150,13 @@ case class StructField(
/**
* Return the default value of this StructField. This is used for storing
the default value of a
* function parameter.
+ *
+ * It is present when the field represents a function parameter with a
default value, such as
+ * `CREATE FUNCTION f(arg INT DEFAULT 42) RETURN ...`.
*/
private[sql] def getDefault(): Option[String] = {
- if (metadata.contains("default")) {
- Option(metadata.getString("default"))
+ if (metadata.contains(StructType.SQL_FUNCTION_DEFAULT_METADATA_KEY)) {
+ Option(metadata.getString(StructType.SQL_FUNCTION_DEFAULT_METADATA_KEY))
} else {
None
}
@@ -183,6 +186,9 @@ case class StructField(
/**
* Return the current default value of this StructField.
+ *
+ * It is present only when the field represents a table column with a
default value, such as:
+ * `ALTER TABLE t ALTER COLUMN c SET DEFAULT 42`.
*/
def getCurrentDefaultValue(): Option[String] = {
if (metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) {
@@ -214,7 +220,8 @@ case class StructField(
}
}
- private def getDDLDefault = getCurrentDefaultValue()
+ private def getDDLDefault = getDefault()
+ .orElse(getCurrentDefaultValue())
.map(" DEFAULT " + _)
.getOrElse("")
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
index cc95d8ee94b0..4c49d3a58f4f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -521,6 +521,7 @@ case class StructType(fields: Array[StructField]) extends
DataType with Seq[Stru
*/
@Stable
object StructType extends AbstractDataType {
+ private[sql] val SQL_FUNCTION_DEFAULT_METADATA_KEY = "default"
override private[sql] def defaultConcreteType: DataType = new StructType
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
index 923373c1856a..f2fd3b90f646 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
@@ -207,7 +207,7 @@ object SQLFunction {
val returnType = parseReturnTypeText(props(RETURN_TYPE), isTableFunc,
parser)
SQLFunction(
name = function.identifier,
- inputParam = props.get(INPUT_PARAM).map(parseTableSchema(_, parser)),
+ inputParam = props.get(INPUT_PARAM).map(parseRoutineParam(_, parser)),
returnType = returnType.get,
exprText = props.get(EXPRESSION),
queryText = props.get(QUERY),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
index a76ca7b15c27..8ed241468352 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
@@ -86,6 +86,11 @@ object UserDefinedFunction {
// The default Hive Metastore SQL schema length for function resource uri.
private val HIVE_FUNCTION_RESOURCE_URI_LENGTH_THRESHOLD: Int = 4000
+ def parseRoutineParam(text: String, parser: ParserInterface): StructType = {
+ val parsed = parser.parseRoutineParam(text)
+ CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType]
+ }
+
def parseTableSchema(text: String, parser: ParserInterface): StructType = {
val parsed = parser.parseTableSchema(text)
CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType]
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
index c17409a68c96..216136d8a7c8 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
@@ -25,6 +25,7 @@ import
org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, Logic
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.errors.QueryParsingErrors
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
/**
* Base class for all ANTLR4 [[ParserInterface]] implementations.
@@ -102,6 +103,13 @@ abstract class AbstractSqlParser extends AbstractParser
with ParserInterface {
}
}
+ override def parseRoutineParam(sqlText: String): StructType = parse(sqlText)
{ parser =>
+ val ctx = parser.singleRoutineParamList()
+ withErrorHandling(ctx, Some(sqlText)) {
+ astBuilder.visitSingleRoutineParamList(ctx)
+ }
+ }
+
def withErrorHandling[T](ctx: ParserRuleContext, sqlText:
Option[String])(toResult: => T): T = {
withOrigin(ctx, sqlText) {
try {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5b66a18aa616..3351644363b8 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -25,7 +25,6 @@ import scala.jdk.CollectionConverters._
import scala.util.{Left, Right}
import org.antlr.v4.runtime.{ParserRuleContext, RuleContext, Token}
-import org.antlr.v4.runtime.misc.Interval
import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
import org.apache.spark.{SparkArithmeticException, SparkException,
SparkIllegalArgumentException, SparkThrowable, SparkThrowableHelper}
@@ -117,6 +116,21 @@ class AstBuilder extends DataTypeAstBuilder
}
}
+ /**
+ * Retrieves the original input text for a given parser context, preserving
all whitespace and
+ * formatting.
+ *
+ * ANTLR's default getText method removes whitespace because lexer rules
typically skip it.
+ * This utility method extracts the exact text from the original input
stream, using token
+ * indices.
+ *
+ * @param ctx The parser context to retrieve original text from.
+ * @return The original input text, including all whitespaces and formatting.
+ */
+ private def getOriginalText(ctx: ParserRuleContext): String = {
+ SparkParserUtils.source(ctx)
+ }
+
/**
* Override the default behavior for all visit methods. This will only
return a non-null result
* when the context has only one child. This is done because there is no
generic method to
@@ -3848,6 +3862,29 @@ class AstBuilder extends DataTypeAstBuilder
* DataType parsing
*
********************************************************************************************
*/
+ override def visitSingleRoutineParamList(
+ ctx: SingleRoutineParamListContext): StructType = withOrigin(ctx) {
+ val cols = visitColDefinitionList(ctx.colDefinitionList())
+ // Generated columns should have been rejected by the parser.
+ for (col <- cols) {
+ assert(col.generationExpression.isEmpty)
+ assert(col.identityColumnSpec.isEmpty)
+ }
+ // Build fields from the columns, converting comments and default values
+ val fields = for (col <- cols) yield {
+ val metadataBuilder = new MetadataBuilder().withMetadata(col.metadata)
+ col.comment.foreach { c =>
+ metadataBuilder.putString("comment", c)
+ }
+ col.defaultValue.foreach { default =>
+ metadataBuilder.putString(
+ StructType.SQL_FUNCTION_DEFAULT_METADATA_KEY, default.originalSQL)
+ }
+ StructField(col.name, col.dataType, col.nullable,
metadataBuilder.build())
+ }
+ StructType(fields.toArray)
+ }
+
/**
* Create top level table schema.
*/
@@ -3950,14 +3987,7 @@ class AstBuilder extends DataTypeAstBuilder
if (expr.containsPattern(PARAMETER)) {
throw QueryParsingErrors.parameterMarkerNotAllowed(place, expr.origin)
}
- // Extract the raw expression text so that we can save the user provided
text. We don't
- // use `Expression.sql` to avoid storing incorrect text caused by bugs in
any expression's
- // `sql` method. Note: `exprCtx.getText` returns a string without spaces,
so we need to
- // get the text from the underlying char stream instead.
- val start = exprCtx.getStart.getStartIndex
- val end = exprCtx.getStop.getStopIndex
- val originalSQL = exprCtx.getStart.getInputStream.getText(new
Interval(start, end))
- DefaultValueExpression(expr, originalSQL)
+ DefaultValueExpression(expr, getOriginalText(exprCtx))
}
/**
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index 3aec1dd43113..f549f440596e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -21,6 +21,7 @@ import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.types.StructType
/**
* Interface for a parser.
@@ -62,4 +63,10 @@ trait ParserInterface extends DataTypeParserInterface {
*/
@throws[ParseException]("Text cannot be parsed to a LogicalPlan")
def parseQuery(sqlText: String): LogicalPlan
+
+ /**
+ * Parse a string to a [[StructType]] as routine parameters, handling
default values and comments.
+ */
+ @throws[ParseException]("Text cannot be parsed to routine parameters")
+ def parseRoutineParam(sqlText: String): StructType
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 4377f6b5bc0c..fe5bdcc00d30 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -62,12 +62,6 @@ object ParserUtils extends SparkParserUtils {
}
}
- /** Get the code that creates the given node. */
- def source(ctx: ParserRuleContext): String = {
- val stream = ctx.getStart.getInputStream
- stream.getText(Interval.of(ctx.getStart.getStartIndex,
ctx.getStop.getStopIndex))
- }
-
/** Get all the text which comes after the given rule. */
def remainder(ctx: ParserRuleContext): String = remainder(ctx.getStop)
diff --git
a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
index 63d623cd2779..82c8192fe070 100644
---
a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
+++
b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
@@ -55,6 +55,9 @@ class SparkConnectWithSessionExtensionSuite extends
SparkFunSuite {
override def parseQuery(sqlText: String): LogicalPlan =
delegate.parseQuery(sqlText)
+
+ override def parseRoutineParam(sqlText: String): StructType =
+ delegate.parseRoutineParam(sqlText)
}
test("Parse table name with test parser") {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8859b7b421b3..b71a8d9175f5 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -689,6 +689,17 @@ class SparkSqlAstBuilder extends AstBuilder {
throw
QueryParsingErrors.createFuncWithBothIfNotExistsAndReplaceError(ctx)
}
+ // Reject invalid options
+ for {
+ parameters <- Option(ctx.parameters)
+ colDefinition <- parameters.colDefinition().asScala
+ option <- colDefinition.colDefinitionOption().asScala
+ } {
+ if (option.generationExpression() != null) {
+ throw
QueryParsingErrors.createFuncWithGeneratedColumnsError(ctx.parameters)
+ }
+ }
+
val inputParamText = Option(ctx.parameters).map(source)
val returnTypeText: String =
if (ctx.RETURNS != null &&
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
index fe4e6f121f57..09b2c8697075 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkException
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{Analyzer, SQLFunctionNode,
UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, SQLFunction,
UserDefinedFunctionErrors}
+import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, SQLFunction,
UserDefinedFunction, UserDefinedFunctionErrors}
import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Generator,
LateralSubquery, Literal, ScalarSubquery, SubqueryExpression, WindowExpression}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.catalyst.plans.Inner
@@ -70,7 +70,7 @@ case class CreateSQLFunctionCommand(
val catalog = sparkSession.sessionState.catalog
val conf = sparkSession.sessionState.conf
- val inputParam = inputParamText.map(parser.parseTableSchema)
+ val inputParam =
inputParamText.map(UserDefinedFunction.parseRoutineParam(_, parser))
val returnType = parseReturnTypeText(returnTypeText, isTableFunc, parser)
val function = SQLFunction(
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
index 8a110190bb02..76252a4db1b5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
@@ -224,41 +224,370 @@ org.apache.spark.sql.AnalysisException
-- !query
CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a
-- !query analysis
-org.apache.spark.sql.catalyst.parser.ParseException
+CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT NULL,
INT, a, false, false, false, true
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query analysis
+Project [spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1()#x]
++- Project [cast(5 as int) AS a#x, cast(cast(null as int) as int) AS a#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT 10, INT,
a, false, false, false, true
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query analysis
+Project [spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1()#x]
++- Project [cast(5 as int) AS a#x, cast(cast(10 as int) as int) AS a#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT
length(substr(current_database(), 1, 1))) RETURNS INT RETURN a
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT
length(substr(current_database(), 1, 1)), INT, a, false, false, false, true
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query analysis
+Project [spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1()#x]
++- Project [cast(5 as int) AS a#x, cast(cast(length(substr(current_schema(),
1, 1)) as int) as int) AS a#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT '5' ||
length(substr(current_database(), 1, 1)))
+ RETURNS INT RETURN a
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT '5' ||
length(substr(current_database(), 1, 1)), INT, a, false, false, false, true
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query analysis
+Project [spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS
spark_catalog.default.foo1d1()#x]
++- Project [cast(5 as int) AS a#x, cast(cast(concat(5,
cast(length(substr(current_schema(), 1, 1)) as string)) as int) as int) AS a#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT RAND()::INT) RETURNS INT
RETURN a
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT
RAND()::INT, INT, a, false, false, false, true
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17))
+ RETURNS INT RETURN a
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
- "errorClass" : "PARSE_SYNTAX_ERROR",
- "sqlState" : "42601",
+ "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+ "sqlState" : "42K09",
"messageParameters" : {
- "error" : "'DEFAULT'",
- "hint" : ""
+ "sqlExpr" : "\"array(55, 17)\"",
+ "srcType" : "\"ARRAY<INT>\"",
+ "targetType" : "\"INT\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 1,
- "stopIndex" : 74,
- "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL)
RETURNS INT RETURN a"
+ "stopIndex" : 85,
+ "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55,
17))\n RETURNS INT RETURN a"
} ]
}
-- !query
-CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT,
b INT) RETURN SELECT a, b
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT (SELECT max(c1) FROM VALUES
(1) AS T(c1)))
+ RETURNS INT RETURN a
-- !query analysis
-org.apache.spark.sql.catalyst.parser.ParseException
+org.apache.spark.sql.AnalysisException
{
- "errorClass" : "PARSE_SYNTAX_ERROR",
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_EXPRESSION",
"sqlState" : "42601",
"messageParameters" : {
- "error" : "'DEFAULT'",
- "hint" : ""
+ "functionName" : "foo1d1",
+ "parameterName" : "a"
+ }
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d
INT DEFAULT 9 COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d2, a INT, b INT DEFAULT 7,
c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test', STRING, a || ' ' || b || ' '
|| c || ' ' || d, false, false, false, true
+
+
+-- !query
+SELECT foo1d2(1, 2, 3, 4), foo1d2(1, 2, 3), foo1d2(1, 2), foo1d2(1)
+-- !query analysis
+Project [spark_catalog.default.foo1d2(a#x, b#x, c#x, d#x) AS
spark_catalog.default.foo1d2(1, 2, 3, 4)#x, spark_catalog.default.foo1d2(a#x,
b#x, c#x, d#x) AS spark_catalog.default.foo1d2(1, 2, 3)#x,
spark_catalog.default.foo1d2(a#x, b#x, c#x, d#x) AS
spark_catalog.default.foo1d2(1, 2)#x, spark_catalog.default.foo1d2(a#x, b#x,
c#x, d#x) AS spark_catalog.default.foo1d2(1)#x]
++- Project [cast(1 as int) AS a#x, cast(2 as int) AS b#x, cast(3 as int) AS
c#x, cast(4 as int) AS d#x, cast(1 as int) AS a#x, cast(2 as int) AS b#x,
cast(3 as int) AS c#x, cast(cast(9 as int) as int) AS d#x, cast(1 as int) AS
a#x, cast(2 as int) AS b#x, cast(cast(8 as int) as int) AS c#x, cast(cast(9 as
int) as int) AS d#x, cast(1 as int) AS a#x, cast(cast(7 as int) as int) AS b#x,
cast(cast(8 as int) as int) AS c#x, cast(cast(9 as int) as int) AS d#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT foo1d2()
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "4",
+ "functionName" : "`spark_catalog`.`default`.`foo1d2`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "foo1d2()"
+ } ]
+}
+
+
+-- !query
+SELECT foo1d2(1, 2, 3, 4, 5)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "5",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "4",
+ "functionName" : "`spark_catalog`.`default`.`foo1d2`"
},
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 28,
+ "fragment" : "foo1d2(1, 2, 3, 4, 5)"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d2(a INT DEFAULT 5, b INT , c INT DEFAULT 8, d
INT DEFAULT 9 COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" :
"USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "functionName" : "foo1d2",
+ "nextParameterName" : "b",
+ "parameterName" : "a"
+ }
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d
INT COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" :
"USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "functionName" : "foo1d2",
+ "nextParameterName" : "d",
+ "parameterName" : "c"
+ }
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION foo1d3(a INT DEFAULT 7 COMMENT 'hello')
RETURNS INT RETURN a
+-- !query analysis
+CreateSQLFunctionCommand foo1d3, a INT DEFAULT 7 COMMENT 'hello', INT, a,
false, true, false, true
+
+
+-- !query
+SELECT foo1d3(5), foo1d3()
+-- !query analysis
+Project [foo1d3(a#x) AS foo1d3(5)#x, foo1d3(a#x) AS foo1d3()#x]
++- Project [cast(5 as int) AS a#x, cast(cast(7 as int) as int) AS a#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT a) RETURNS INT RETURN a
+ b
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`a`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 1,
+ "fragment" : "a"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT 3) RETURNS INT RETURN a
+ b
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d4, a INT, b INT DEFAULT 3,
INT, a + b, false, false, false, true
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT DEFAULT foo1d4(6)) RETURNS INT
RETURN a + b
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d5, a INT, b INT DEFAULT
foo1d4(6), INT, a + b, false, false, false, true
+
+
+-- !query
+SELECT foo1d5(10), foo1d5(10, 2)
+-- !query analysis
+Project [spark_catalog.default.foo1d5(a#x, b#x) AS
spark_catalog.default.foo1d5(10)#x, spark_catalog.default.foo1d5(a#x, b#x) AS
spark_catalog.default.foo1d5(10, 2)#x]
++- Project [cast(10 as int) AS a#x,
cast(cast(spark_catalog.default.foo1d4(a#x, b#x) as int) as int) AS b#x,
cast(10 as int) AS a#x, cast(2 as int) AS b#x]
+ +- Project [cast(6 as int) AS a#x, cast(cast(3 as int) as int) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT) RETURNS INT RETURN a +
foo1d4(b)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d5, a INT, b INT, INT, a +
foo1d4(b), false, false, false, true
+
+
+-- !query
+SELECT foo1d5(10, 2)
+-- !query analysis
+Project [spark_catalog.default.foo1d5(a#x, b#x) AS
spark_catalog.default.foo1d5(10, 2)#x]
++- Project [a#x, b#x, cast(b#x as int) AS a#x, cast(cast(3 as int) as int) AS
b#x]
+ +- Project [cast(10 as int) AS a#x, cast(2 as int) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT,
b INT) RETURN SELECT a, b
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.foo1d6, a INT, b INT DEFAULT 7,
a INT, b INT, SELECT a, b, true, false, false, true
+
+
+-- !query
+SELECT * FROM foo1d6(5)
+-- !query analysis
+Project [a#x, b#x]
++- SQLFunctionNode spark_catalog.default.foo1d6
+ +- SubqueryAlias foo1d6
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+ +- Project [cast(5 as int) AS a#x, cast(7 as int) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT * FROM foo1d6(5, 2)
+-- !query analysis
+Project [a#x, b#x]
++- SQLFunctionNode spark_catalog.default.foo1d6
+ +- SubqueryAlias foo1d6
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+ +- Project [cast(5 as int) AS a#x, cast(2 as int) AS b#x]
+ +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo1e1(x INT NOT NULL, y INT) RETURNS INT RETURN 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "input" : "x INT NOT NULL, y INT"
+ }
+}
+
+
+-- !query
+CREATE FUNCTION foo1e2(x INT, y INT NOT NULL) RETURNS TABLE (x INT) RETURN
SELECT 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "input" : "x INT, y INT NOT NULL"
+ }
+}
+
+
+-- !query
+CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN
SELECT 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "input" : "x INT NOT NULL"
+ }
+}
+
+
+-- !query
+CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT
RETURN y + 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" :
"INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS",
+ "sqlState" : "42000",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 90,
+ "fragment" : "CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x +
10)) RETURNS INT RETURN y + 1"
+ } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT
RETURN id + 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" :
"INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS",
+ "sqlState" : "42000",
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 1,
- "stopIndex" : 104,
- "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7)
RETURNS TABLE(a INT, b INT) RETURN SELECT a, b"
+ "stopIndex" : 91,
+ "fragment" : "CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS
IDENTITY) RETURNS BIGINT RETURN id + 1"
} ]
}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
index 989fb7c4fae3..f71cca3fe5d3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
@@ -42,12 +42,109 @@ CREATE FUNCTION foo1c2(a INT, b INT, thisisaduplicate INT,
c INT, d INT, e INT,
RETURNS TABLE (a INT) RETURN SELECT 1;
-- 1.1.d DEFAULT parameters
--- TODO(SPARK-51439): Support default parameters in SQL UDF
--- DEFAULT in scalar function
+-- A NULL default
CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a;
+-- Expect 5, NULL
+SELECT foo1d1(5), foo1d1();
+
+-- A literal default
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a;
+
+-- Expect 5, 10
+SELECT foo1d1(5), foo1d1();
+
+-- A constant expression
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT
length(substr(current_database(), 1, 1))) RETURNS INT RETURN a;
+
+-- Expect 5, 1
+SELECT foo1d1(5), foo1d1();
+
+-- An expression that needs a cast
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT '5' ||
length(substr(current_database(), 1, 1)))
+ RETURNS INT RETURN a;
+
+-- Expect 5, 51
+SELECT foo1d1(5), foo1d1();
+
+-- A non deterministic default
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT RAND()::INT) RETURNS INT
RETURN a;
+
+-- Expect 5, 0
+SELECT foo1d1(5), foo1d1();
+
+-- Cannot cast
+-- Expect error
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17))
+ RETURNS INT RETURN a;
+
+-- A subquery
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT (SELECT max(c1) FROM VALUES
(1) AS T(c1)))
+ RETURNS INT RETURN a;
+
+-- Multiple parameters
+CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d
INT DEFAULT 9 COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d;
+
+-- Expect: (1 2 3 4), (1 2 3 9), (1 2 8 9), (1 7 8 9)
+SELECT foo1d2(1, 2, 3, 4), foo1d2(1, 2, 3), foo1d2(1, 2), foo1d2(1);
+
+-- Expect error a has no default
+SELECT foo1d2();
+
+-- Expect error, too many parameters
+SELECT foo1d2(1, 2, 3, 4, 5);
+
+-- Sparse default, expect error
+CREATE OR REPLACE FUNCTION foo1d2(a INT DEFAULT 5, b INT , c INT DEFAULT 8, d
INT DEFAULT 9 COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d;
+
+CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d
INT COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d;
+
+-- Temporary function
+CREATE OR REPLACE TEMPORARY FUNCTION foo1d3(a INT DEFAULT 7 COMMENT 'hello')
RETURNS INT RETURN a;
+
+-- Expect 5, 7
+SELECT foo1d3(5), foo1d3();
+
+-- Dependent default
+-- Expect error
+CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT a) RETURNS INT RETURN a
+ b;
+
+-- Defaults with SQL UDF
+CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT 3) RETURNS INT RETURN a
+ b;
+
+CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT DEFAULT foo1d4(6)) RETURNS INT
RETURN a + b;
+
+-- Expect 19, 12
+SELECT foo1d5(10), foo1d5(10, 2);
+
+-- Function invocation with default in SQL UDF
+CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT) RETURNS INT RETURN a +
foo1d4(b);
+
+-- Expect 15
+SELECT foo1d5(10, 2);
+
-- DEFAULT in table function
-CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT,
b INT) RETURN SELECT a, b;
+CREATE OR REPLACE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT,
b INT) RETURN SELECT a, b;
+
+-- Expect (5, 7)
+SELECT * FROM foo1d6(5);
+
+-- Expect (5, 2)
+SELECT * FROM foo1d6(5, 2);
+
+-- 1.1.e NOT NULL
+-- Expect failure
+CREATE FUNCTION foo1e1(x INT NOT NULL, y INT) RETURNS INT RETURN 1;
+CREATE FUNCTION foo1e2(x INT, y INT NOT NULL) RETURNS TABLE (x INT) RETURN
SELECT 1;
+CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN
SELECT 1;
+
+-- 1.1.f GENERATED ALWAYS AS
+-- Expect failure
+CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT
RETURN y + 1;
+CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT
RETURN id + 1;
-------------------------------
-- 2. Scalar SQL UDF
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
index 184489190d0e..b15a6ec2551d 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
@@ -183,43 +183,408 @@ CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL)
RETURNS INT RETURN a
-- !query schema
struct<>
-- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
+
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query schema
+struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int>
+-- !query output
+5 NULL
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query schema
+struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int>
+-- !query output
+5 10
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT
length(substr(current_database(), 1, 1))) RETURNS INT RETURN a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query schema
+struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int>
+-- !query output
+5 1
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT '5' ||
length(substr(current_database(), 1, 1)))
+ RETURNS INT RETURN a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query schema
+struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int>
+-- !query output
+5 51
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT RAND()::INT) RETURNS INT
RETURN a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d1(5), foo1d1()
+-- !query schema
+struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int>
+-- !query output
+5 0
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17))
+ RETURNS INT RETURN a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
- "errorClass" : "PARSE_SYNTAX_ERROR",
- "sqlState" : "42601",
+ "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+ "sqlState" : "42K09",
"messageParameters" : {
- "error" : "'DEFAULT'",
- "hint" : ""
+ "sqlExpr" : "\"array(55, 17)\"",
+ "srcType" : "\"ARRAY<INT>\"",
+ "targetType" : "\"INT\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 1,
- "stopIndex" : 74,
- "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL)
RETURNS INT RETURN a"
+ "stopIndex" : 85,
+ "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55,
17))\n RETURNS INT RETURN a"
} ]
}
-- !query
-CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT,
b INT) RETURN SELECT a, b
+CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT (SELECT max(c1) FROM VALUES
(1) AS T(c1)))
+ RETURNS INT RETURN a
-- !query schema
struct<>
-- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_EXPRESSION",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "functionName" : "foo1d1",
+ "parameterName" : "a"
+ }
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d
INT DEFAULT 9 COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d2(1, 2, 3, 4), foo1d2(1, 2, 3), foo1d2(1, 2), foo1d2(1)
+-- !query schema
+struct<spark_catalog.default.foo1d2(1, 2, 3,
4):string,spark_catalog.default.foo1d2(1, 2,
3):string,spark_catalog.default.foo1d2(1,
2):string,spark_catalog.default.foo1d2(1):string>
+-- !query output
+1 2 3 4 1 2 3 9 1 2 8 9 1 7 8 9
+
+
+-- !query
+SELECT foo1d2()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "4",
+ "functionName" : "`spark_catalog`.`default`.`foo1d2`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "foo1d2()"
+ } ]
+}
+
+
+-- !query
+SELECT foo1d2(1, 2, 3, 4, 5)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "5",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "4",
+ "functionName" : "`spark_catalog`.`default`.`foo1d2`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 28,
+ "fragment" : "foo1d2(1, 2, 3, 4, 5)"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d2(a INT DEFAULT 5, b INT , c INT DEFAULT 8, d
INT DEFAULT 9 COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" :
"USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "functionName" : "foo1d2",
+ "nextParameterName" : "b",
+ "parameterName" : "a"
+ }
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d
INT COMMENT 'test')
+ RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
{
- "errorClass" : "PARSE_SYNTAX_ERROR",
+ "errorClass" :
"USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION",
"sqlState" : "42601",
"messageParameters" : {
- "error" : "'DEFAULT'",
- "hint" : ""
+ "functionName" : "foo1d2",
+ "nextParameterName" : "d",
+ "parameterName" : "c"
+ }
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION foo1d3(a INT DEFAULT 7 COMMENT 'hello')
RETURNS INT RETURN a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d3(5), foo1d3()
+-- !query schema
+struct<foo1d3(5):int,foo1d3():int>
+-- !query output
+5 7
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT a) RETURNS INT RETURN a
+ b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`a`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 1,
- "stopIndex" : 104,
- "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7)
RETURNS TABLE(a INT, b INT) RETURN SELECT a, b"
+ "stopIndex" : 1,
+ "fragment" : "a"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT 3) RETURNS INT RETURN a
+ b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT DEFAULT foo1d4(6)) RETURNS INT
RETURN a + b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d5(10), foo1d5(10, 2)
+-- !query schema
+struct<spark_catalog.default.foo1d5(10):int,spark_catalog.default.foo1d5(10,
2):int>
+-- !query output
+19 12
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT) RETURNS INT RETURN a +
foo1d4(b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1d5(10, 2)
+-- !query schema
+struct<spark_catalog.default.foo1d5(10, 2):int>
+-- !query output
+15
+
+
+-- !query
+CREATE OR REPLACE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT,
b INT) RETURN SELECT a, b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM foo1d6(5)
+-- !query schema
+struct<a:int,b:int>
+-- !query output
+5 7
+
+
+-- !query
+SELECT * FROM foo1d6(5, 2)
+-- !query schema
+struct<a:int,b:int>
+-- !query output
+5 2
+
+
+-- !query
+CREATE FUNCTION foo1e1(x INT NOT NULL, y INT) RETURNS INT RETURN 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "input" : "x INT NOT NULL, y INT"
+ }
+}
+
+
+-- !query
+CREATE FUNCTION foo1e2(x INT, y INT NOT NULL) RETURNS TABLE (x INT) RETURN
SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "input" : "x INT, y INT NOT NULL"
+ }
+}
+
+
+-- !query
+CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN
SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "input" : "x INT NOT NULL"
+ }
+}
+
+
+-- !query
+CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT
RETURN y + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" :
"INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS",
+ "sqlState" : "42000",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 90,
+ "fragment" : "CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x +
10)) RETURNS INT RETURN y + 1"
+ } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT
RETURN id + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" :
"INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS",
+ "sqlState" : "42000",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 91,
+ "fragment" : "CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS
IDENTITY) RETURNS BIGINT RETURN id + 1"
} ]
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 8f0a62e210d8..c1c041509c35 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -640,6 +640,10 @@ case class MyParser(spark: SparkSession, delegate:
ParserInterface) extends Pars
override def parseQuery(sqlText: String): LogicalPlan =
delegate.parseQuery(sqlText)
+
+ override def parseRoutineParam(sqlText: String): StructType = {
+ delegate.parseRoutineParam(sqlText)
+ }
}
object MyExtensions {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
index 0878a0cb54d2..721bfe002969 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
@@ -74,4 +74,17 @@ class SQLFunctionSuite extends QueryTest with
SharedSparkSession {
|""".stripMargin), Seq(Row(2), Row(4)))
}
}
+
+ test("SQL scalar function with default value") {
+ withUserDefinedFunction("bar" -> false) {
+ sql(
+ """
+ |CREATE FUNCTION bar(x INT DEFAULT 7)
+ |RETURNS INT
+ |RETURN x + 1
+ |""".stripMargin)
+ checkAnswer(sql("SELECT bar()"), Row(8))
+ checkAnswer(sql("SELECT bar(1)"), Row(2))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]