This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d5fad6381014 [SPARK-46074][CONNECT][SCALA] Insufficient details in 
error message on UDF failure
d5fad6381014 is described below

commit d5fad63810149a69527706bb16333baee06a4270
Author: Niranjan Jayakar <[email protected]>
AuthorDate: Mon Nov 27 08:47:31 2023 +0900

    [SPARK-46074][CONNECT][SCALA] Insufficient details in error message on UDF 
failure
    
    ### What changes were proposed in this pull request?
    
    Update the error message for 'FAILED_EXECUTE_UDF' with the underlying error
    message.
    
    ### Why are the changes needed?
    
    The Spark Connect client does not receive the underlying cause for a UDF 
failure.
    This means that a user needs to go into the driver logs to identify the 
cause for
    failure.
    
    Update the error message so that the underlying exception's message is 
included.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. This changes the error message that the user sees when a UDF fails. A 
new error
    parameter is added but the SQL state and existing parameters are unchanged 
and should
    cause no regressions.
    
    The error message prior to this change:
    
    ```
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 
in stage 0.0 failed 1 times, most recent failure: Lost task 3.0 in stage 0.0 
(TID 3) (192.168.188.21 executor driver): org.apache.spark.SparkException: 
[FAILED_EXECUTE_UDF] Failed to execute user defined function (` 
(cmd2$Helper$$Lambda$2170/0x000000f001d23000)`: (int) => int). SQLSTATE: 39000
    ```
    
    Sample of the new error message:
    
    ```
    org.apache.spark.SparkException: [FAILED_EXECUTE_UDF] User defined function 
(` (cmd2$Helper$$Lambda$2422/0x0000007001ec1a10)`: (int) => int) failed due to: 
java.lang.NoClassDefFoundError: com/nija/test/MyClass. SQLSTATE: 39000
    ```
    
    ### How was this patch tested?
    
    Tested manually by running a [local connect server] and [connect client 
REPL]
    
    [local connect server]: 
https://github.com/apache/spark/blob/master/connector/connect/bin/spark-connect-shell
    [connect client REPL]: 
https://github.com/apache/spark/blob/master/connector/connect/bin/spark-connect-scala-client
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43983 from nija-at/udf-error-msg.
    
    Authored-by: Niranjan Jayakar <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 common/utils/src/main/resources/error/error-classes.json   |  2 +-
 docs/sql-error-conditions.md                               |  2 +-
 .../org/apache/spark/sql/errors/QueryExecutionErrors.scala |  3 ++-
 .../spark/sql/catalyst/expressions/ScalaUDFSuite.scala     |  6 ++++--
 .../spark/sql/errors/QueryExecutionErrorsSuite.scala       |  6 ++++--
 .../org/apache/spark/sql/hive/execution/HiveUDFSuite.scala | 14 ++++++++++++--
 6 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 19b70307a1cd..5b70edf249d1 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1067,7 +1067,7 @@
   },
   "FAILED_EXECUTE_UDF" : {
     "message" : [
-      "Failed to execute user defined function (<functionName>: (<signature>) 
=> <result>)."
+      "User defined function (<functionName>: (<signature>) => <result>) 
failed due to: <reason>."
     ],
     "sqlState" : "39000"
   },
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index c0f88bffa6e5..71abf10da328 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -643,7 +643,7 @@ Column expression `<expr>` cannot be sorted because its 
type `<exprType>` is not
 
 [SQLSTATE: 
39000](sql-error-conditions-sqlstates.html#class-39-external-routine-invocation-exception)
 
-Failed to execute user defined function (`<functionName>`: (`<signature>`) => 
`<result>`).
+User defined function (`<functionName>`: (`<signature>`) => `<result>`) failed 
due to: `<reason>`.
 
 ### FAILED_FUNCTION_CALL
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1aa25a51fa9c..24332479f193 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -190,7 +190,8 @@ private[sql] object QueryExecutionErrors extends 
QueryErrorsBase with ExecutionE
       messageParameters = Map(
         "functionName" -> toSQLId(functionName),
         "signature" -> inputTypes,
-        "result" -> outputType),
+        "result" -> outputType,
+        "reason" -> e.toString),
       cause = e)
   }
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 1b40e02aa866..00fc9d462eb6 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -50,13 +50,15 @@ class ScalaUDFSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       Literal.create(null, StringType) :: Nil,
       Option(resolvedEncoder[String]()) :: Nil)
 
+    val pattern = "User defined function .+ failed due to: 
java.lang.NullPointerException".r
+
     val e1 = intercept[SparkException](udf.eval())
-    assert(e1.getMessage.contains("Failed to execute user defined function"))
+    assert(pattern.findFirstIn(e1.getMessage).isDefined)
 
     val e2 = intercept[SparkException] {
       checkEvaluationWithUnsafeProjection(udf, null)
     }
-    assert(e2.getMessage.contains("Failed to execute user defined function"))
+    assert(pattern.findFirstIn(e2.getMessage).isDefined)
   }
 
   test("SPARK-22695: ScalaUDF should not use global variables") {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index a49352cbe508..1e869bfd25aa 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -431,7 +431,8 @@ class QueryExecutionErrorsSuite
       parameters = Map(
         "functionName" -> functionNameRegex,
         "signature" -> "string, int",
-        "result" -> "string"),
+        "result" -> "string",
+        "reason" -> "java.lang.StringIndexOutOfBoundsException: begin 5, end 
6, length 5"),
       matchPVals = true)
   }
 
@@ -455,7 +456,8 @@ class QueryExecutionErrorsSuite
       errorClass = "FAILED_EXECUTE_UDF",
       parameters = Map("functionName" -> functionNameRegex,
         "signature" -> "string, int",
-        "result" -> "string"),
+        "result" -> "string",
+        "reason" -> "java.lang.StringIndexOutOfBoundsException: begin 5, end 
6, length 5"),
       matchPVals = true)
   }
 
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 3813071b680c..096b11feb9bc 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -754,7 +754,9 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton 
with SQLTestUtils {
             "functionName" ->
               
"`org`.`apache`.`hadoop`.`hive`.`ql`.`udf`.`generic`.`GenericUDFAssertTrue`",
             "signature" -> "boolean",
-            "result" -> "void"))
+            "result" -> "void",
+            "reason" ->
+              "org.apache.hadoop.hive.ql.metadata.HiveException: 
ASSERT_TRUE(): assertion failed."))
       }
     }
   }
@@ -778,6 +780,13 @@ class HiveUDFSuite extends QueryTest with 
TestHiveSingleton with SQLTestUtils {
       withTable("HiveSimpleUDFTable") {
         sql(s"create table HiveSimpleUDFTable as select false as v")
         val df = sql("SELECT CodeGenHiveSimpleUDF(v) from HiveSimpleUDFTable")
+
+        val reason = """
+          |org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute 
method public
+          |boolean 
org.apache.spark.sql.hive.execution.SimpleUDFAssertTrue.evaluate(boolean) with
+          |arguments {false}:ASSERT_TRUE(): assertion failed."""
+          .stripMargin.replaceAll("\n", " ").trim
+
         checkError(
           exception = 
intercept[SparkException](df.collect()).getCause.asInstanceOf[SparkException],
           errorClass = "FAILED_EXECUTE_UDF",
@@ -785,7 +794,8 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton 
with SQLTestUtils {
             "functionName" ->
               
"`org`.`apache`.`spark`.`sql`.`hive`.`execution`.`SimpleUDFAssertTrue`",
             "signature" -> "boolean",
-            "result" -> "boolean"
+            "result" -> "boolean",
+            "reason" -> reason
           )
         )
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to