This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a47e2d1c07cc [SPARK-55907][SQL] Fix incorrect error positions for
invalid data types in CREATE FUNCTION
a47e2d1c07cc is described below
commit a47e2d1c07cc01a900d02bfd240dc7926521f62e
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Mar 10 22:08:34 2026 -0700
[SPARK-55907][SQL] Fix incorrect error positions for invalid data types in
CREATE FUNCTION
### What changes were proposed in this pull request?
In CREATE FUNCTION statements, data type errors (e.g. STRUCT without <>) in
parameters or return types were reported with incorrect line/position
information. This happened because data types were not validated during the
initial AST visit of visitCreateUserDefinedFunction—instead, parameter and
return type text was captured as raw strings via source() and only parsed later
in a separate context, losing the original position information.
For example, given this multi-line SQL:
```
CREATE OR REPLACE FUNCTION error_log_udf_v2(
log_struct STRUCT<level STRING, message STRING>,
request_vars_struct STRUCT
)
RETURNS STRING
RETURN CONCAT(
'Error: ', log_struct.level, ' ', log_struct.message, ' ',
request_vars_struct
)
```
The incomplete STRUCT (missing <...>) is on line 3 at position 126. Before
this fix, the error context pointed to an incorrect location (line 2), making
it difficult for users to find the actual problem.
This PR eagerly validates data types by calling typedVisit[DataType] on
each parameter's and return type's dataType node during the initial
`visitCreateUserDefinedFunction` visit. This ensures errors like
INCOMPLETE_TYPE_DEFINITION.STRUCT are reported with correct positions relative
to the full SQL statement.
### Why are the changes needed?
For multi-line CREATE FUNCTION statements with invalid data types, the
error context (fragment position, line number) pointed to wrong
locations, making it difficult for users to locate the actual error in
their SQL.
### Does this PR introduce any user-facing change?
Yes. Error messages for invalid data types in CREATE FUNCTION parameters
and return types now report correct positions.
### How was this patch tested?
Added new tests in QueryParsingErrorsSuite:
All existing QueryParsingErrorsSuite (47 tests) and SparkSqlParserSuite
(45 tests) pass with no regressions.
### Was this patch authored or co-authored using generative AI tooling?
Yes, Opus 4.6
Closes #54710 from gengliangwang/fixCreateFunction.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
---
.../spark/sql/execution/SparkSqlParser.scala | 37 ++++++++++++++-----
.../spark/sql/errors/QueryParsingErrorsSuite.scala | 42 ++++++++++++++++++++++
2 files changed, 71 insertions(+), 8 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index b024c6d1f0aa..6c19f53d2dc4 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{DataType, StringType}
import org.apache.spark.util.Utils.getUriBuilder
/**
@@ -896,21 +896,42 @@ class SparkSqlAstBuilder extends AstBuilder {
throw
QueryParsingErrors.createFuncWithBothIfNotExistsAndReplaceError(ctx)
}
- // Reject invalid options
+ // Reject invalid options and validate parameter data types eagerly so
that errors
+ // are reported with correct line numbers relative to the full SQL
statement.
for {
parameters <- Option(ctx.parameters)
colDefinition <- parameters.colDefinition().asScala
- option <- colDefinition.colDefinitionOption().asScala
} {
- if (option.generationExpression() != null) {
- throw
QueryParsingErrors.createFuncWithGeneratedColumnsError(ctx.parameters)
- }
- if (option.columnConstraintDefinition() != null) {
- throw
QueryParsingErrors.createFuncWithConstraintError(ctx.parameters)
+ // Trigger data type validation now (while the original parse tree
positions are
+ // available) so that any type errors (e.g. STRUCT without <>) report
the correct
+ // line/position. The result is unused; this call is purely for its
side effect of
+ // throwing a parse exception with accurate location information.
+ typedVisit[DataType](colDefinition.dataType())
+ for (option <- colDefinition.colDefinitionOption().asScala) {
+ if (option.generationExpression() != null) {
+ throw
QueryParsingErrors.createFuncWithGeneratedColumnsError(ctx.parameters)
+ }
+ if (option.columnConstraintDefinition() != null) {
+ throw
QueryParsingErrors.createFuncWithConstraintError(ctx.parameters)
+ }
}
}
val inputParamText = Option(ctx.parameters).map(source)
+ // Validate return type eagerly for the same reason as parameter data
types above:
+ // trigger type errors now so they report correct positions.
+ // Skip validation when the return type is TABLE (for table-valued
functions):
+ // "RETURNS TABLE" or "RETURNS TABLE(...)" is not a real data type to
validate.
+ Option(ctx.dataType).foreach { dt =>
+ if (!source(dt).equalsIgnoreCase("table")) {
+ typedVisit[DataType](dt)
+ }
+ }
+ Option(ctx.returnParams).foreach { params =>
+ params.colType().asScala.foreach { colType =>
+ typedVisit[DataType](colType.dataType())
+ }
+ }
val returnTypeText: String =
if (ctx.RETURNS != null &&
(Option(ctx.dataType).nonEmpty ||
Option(ctx.returnParams).nonEmpty)) {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 35dd1d15ef71..a9104eb2ec3e 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -672,6 +672,48 @@ class QueryParsingErrorsSuite extends QueryTest with
SharedSparkSession with SQL
context = ExpectedContext(fragment = "struct", start = 30, stop = 35))
}
+ test("INCOMPLETE_TYPE_DEFINITION: error position for multi-line CREATE
FUNCTION parameter") {
+ // The incomplete STRUCT is on line 3. The error should reference its
position, not line 2.
+ val sqlText =
+ """CREATE OR REPLACE FUNCTION error_log_udf_v2(
+ | log_struct STRUCT<level STRING, message STRING>,
+ | request_vars_struct STRUCT
+ |)
+ |RETURNS STRING
+ | RETURN CONCAT(
+ | 'Error: ', log_struct.level, ' ', log_struct.message, ' ',
request_vars_struct
+ | )""".stripMargin
+ checkError(
+ exception = parseException(sqlText),
+ condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "STRUCT", start = 126, stop = 131))
+ }
+
+ test("INCOMPLETE_TYPE_DEFINITION: error position for multi-line CREATE
FUNCTION return type") {
+ val sqlText =
+ """CREATE OR REPLACE FUNCTION my_func(x INT)
+ |RETURNS STRUCT
+ | RETURN x""".stripMargin
+ checkError(
+ exception = parseException(sqlText),
+ condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "STRUCT", start = 50, stop = 55))
+ }
+
+ test("INCOMPLETE_TYPE_DEFINITION: error position for multi-line CREATE
FUNCTION return params") {
+ val sqlText =
+ """CREATE OR REPLACE FUNCTION my_func(x INT)
+ |RETURNS TABLE(result STRUCT)
+ | RETURN SELECT x""".stripMargin
+ checkError(
+ exception = parseException(sqlText),
+ condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "STRUCT", start = 63, stop = 68))
+ }
+
test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
// Cast simple map without specifying element type
checkError(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]