This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a47e2d1c07cc [SPARK-55907][SQL] Fix incorrect error positions for 
invalid data types in CREATE FUNCTION
a47e2d1c07cc is described below

commit a47e2d1c07cc01a900d02bfd240dc7926521f62e
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Mar 10 22:08:34 2026 -0700

    [SPARK-55907][SQL] Fix incorrect error positions for invalid data types in 
CREATE FUNCTION
    
    ### What changes were proposed in this pull request?
    
    In CREATE FUNCTION statements, data type errors (e.g. STRUCT without <>) in 
parameters or return types were reported with incorrect line/position 
information. This happened because data types were not validated during the 
initial AST visit of visitCreateUserDefinedFunction—instead, parameter and 
return type text was captured as raw strings via source() and only parsed later 
in a separate context, losing the original position information.
    
    For example, given this multi-line SQL:
    ```
      CREATE OR REPLACE FUNCTION error_log_udf_v2(
           log_struct STRUCT<level STRING, message STRING>,
          request_vars_struct STRUCT
      )
      RETURNS STRING
        RETURN CONCAT(
             'Error: ', log_struct.level, ' ', log_struct.message, ' ', 
request_vars_struct
         )
    ```
    The incomplete STRUCT (missing <...>) is on line 3 at position 126. Before 
this fix, the error context pointed to an incorrect location  (line 2), making 
it difficult for users to find the actual problem.
    
    This PR eagerly validates data types by calling typedVisit[DataType] on 
each parameter's and return type's dataType node during the initial 
`visitCreateUserDefinedFunction` visit. This ensures errors like 
INCOMPLETE_TYPE_DEFINITION.STRUCT are reported with correct positions relative 
to the full SQL statement.
    
    ### Why are the changes needed?
    
      For multi-line CREATE FUNCTION statements with invalid data types, the 
error context (fragment position, line number) pointed to wrong
      locations, making it difficult for users to locate the actual error in 
their SQL.
    
    ### Does this PR introduce any user-facing change?
    
      Yes. Error messages for invalid data types in CREATE FUNCTION parameters 
and return types now report correct positions.
    
    ### How was this patch tested?
    
      Added new tests in QueryParsingErrorsSuite:
      All existing QueryParsingErrorsSuite (47 tests) and SparkSqlParserSuite 
(45 tests) pass with no regressions.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
      Yes, Opus 4.6
    
    Closes #54710 from gengliangwang/fixCreateFunction.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../spark/sql/execution/SparkSqlParser.scala       | 37 ++++++++++++++-----
 .../spark/sql/errors/QueryParsingErrorsSuite.scala | 42 ++++++++++++++++++++++
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index b024c6d1f0aa..6c19f53d2dc4 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{DataType, StringType}
 import org.apache.spark.util.Utils.getUriBuilder
 
 /**
@@ -896,21 +896,42 @@ class SparkSqlAstBuilder extends AstBuilder {
         throw 
QueryParsingErrors.createFuncWithBothIfNotExistsAndReplaceError(ctx)
       }
 
-      // Reject invalid options
+      // Reject invalid options and validate parameter data types eagerly so 
that errors
+      // are reported with correct line numbers relative to the full SQL 
statement.
       for {
         parameters <- Option(ctx.parameters)
         colDefinition <- parameters.colDefinition().asScala
-        option <- colDefinition.colDefinitionOption().asScala
       } {
-        if (option.generationExpression() != null) {
-          throw 
QueryParsingErrors.createFuncWithGeneratedColumnsError(ctx.parameters)
-        }
-        if (option.columnConstraintDefinition() != null) {
-          throw 
QueryParsingErrors.createFuncWithConstraintError(ctx.parameters)
+        // Trigger data type validation now (while the original parse tree 
positions are
+        // available) so that any type errors (e.g. STRUCT without <>) report 
the correct
+        // line/position. The result is unused; this call is purely for its 
side effect of
+        // throwing a parse exception with accurate location information.
+        typedVisit[DataType](colDefinition.dataType())
+        for (option <- colDefinition.colDefinitionOption().asScala) {
+          if (option.generationExpression() != null) {
+            throw 
QueryParsingErrors.createFuncWithGeneratedColumnsError(ctx.parameters)
+          }
+          if (option.columnConstraintDefinition() != null) {
+            throw 
QueryParsingErrors.createFuncWithConstraintError(ctx.parameters)
+          }
         }
       }
 
       val inputParamText = Option(ctx.parameters).map(source)
+      // Validate return type eagerly for the same reason as parameter data 
types above:
+      // trigger type errors now so they report correct positions.
+      // Skip validation when the return type is TABLE (for table-valued 
functions):
+      // "RETURNS TABLE" or "RETURNS TABLE(...)" is not a real data type to 
validate.
+      Option(ctx.dataType).foreach { dt =>
+        if (!source(dt).equalsIgnoreCase("table")) {
+          typedVisit[DataType](dt)
+        }
+      }
+      Option(ctx.returnParams).foreach { params =>
+        params.colType().asScala.foreach { colType =>
+          typedVisit[DataType](colType.dataType())
+        }
+      }
       val returnTypeText: String =
         if (ctx.RETURNS != null &&
           (Option(ctx.dataType).nonEmpty || 
Option(ctx.returnParams).nonEmpty)) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 35dd1d15ef71..a9104eb2ec3e 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -672,6 +672,48 @@ class QueryParsingErrorsSuite extends QueryTest with 
SharedSparkSession with SQL
       context = ExpectedContext(fragment = "struct", start = 30, stop = 35))
   }
 
+  test("INCOMPLETE_TYPE_DEFINITION: error position for multi-line CREATE 
FUNCTION parameter") {
+    // The incomplete STRUCT is on line 3. The error should reference its 
position, not line 2.
+    val sqlText =
+      """CREATE OR REPLACE FUNCTION error_log_udf_v2(
+        |        log_struct STRUCT<level STRING, message STRING>,
+        |    request_vars_struct STRUCT
+        |)
+        |RETURNS STRING
+        |  RETURN CONCAT(
+        |       'Error: ', log_struct.level, ' ', log_struct.message, ' ', 
request_vars_struct
+        |   )""".stripMargin
+    checkError(
+      exception = parseException(sqlText),
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 126, stop = 131))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: error position for multi-line CREATE 
FUNCTION return type") {
+    val sqlText =
+      """CREATE OR REPLACE FUNCTION my_func(x INT)
+        |RETURNS STRUCT
+        |  RETURN x""".stripMargin
+    checkError(
+      exception = parseException(sqlText),
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 50, stop = 55))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: error position for multi-line CREATE 
FUNCTION return params") {
+    val sqlText =
+      """CREATE OR REPLACE FUNCTION my_func(x INT)
+        |RETURNS TABLE(result STRUCT)
+        |  RETURN SELECT x""".stripMargin
+    checkError(
+      exception = parseException(sqlText),
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 63, stop = 68))
+  }
+
   test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
     // Cast simple map without specifying element type
     checkError(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to