This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d5fad6381014 [SPARK-46074][CONNECT][SCALA] Insufficient details in
error message on UDF failure
d5fad6381014 is described below
commit d5fad63810149a69527706bb16333baee06a4270
Author: Niranjan Jayakar <[email protected]>
AuthorDate: Mon Nov 27 08:47:31 2023 +0900
[SPARK-46074][CONNECT][SCALA] Insufficient details in error message on UDF
failure
### What changes were proposed in this pull request?
Update the error message for 'FAILED_EXECUTE_UDF' with the underlying error
message.
### Why are the changes needed?
The Spark Connect client does not receive the underlying cause for a UDF
failure.
This means that a user needs to go into the driver logs to identify the
cause for
failure.
Update the error message so that the underlying exception's message is
included.
### Does this PR introduce _any_ user-facing change?
Yes. This changes the error message that the user sees when a UDF fails. A
new error
parameter is added but the SQL state and existing parameters are unchanged
and should
cause no regressions.
The error message prior to this change:
```
org.apache.spark.SparkException: Job aborted due to stage failure: Task 3
in stage 0.0 failed 1 times, most recent failure: Lost task 3.0 in stage 0.0
(TID 3) (192.168.188.21 executor driver): org.apache.spark.SparkException:
[FAILED_EXECUTE_UDF] Failed to execute user defined function (`
(cmd2$Helper$$Lambda$2170/0x000000f001d23000)`: (int) => int). SQLSTATE: 39000
```
Sample of the new error message:
```
org.apache.spark.SparkException: [FAILED_EXECUTE_UDF] User defined function
(` (cmd2$Helper$$Lambda$2422/0x0000007001ec1a10)`: (int) => int) failed due to:
java.lang.NoClassDefFoundError: com/nija/test/MyClass. SQLSTATE: 39000
```
### How was this patch tested?
Tested manually by running a [local connect server] and [connect client
REPL]
[local connect server]:
https://github.com/apache/spark/blob/master/connector/connect/bin/spark-connect-shell
[connect client REPL]:
https://github.com/apache/spark/blob/master/connector/connect/bin/spark-connect-scala-client
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43983 from nija-at/udf-error-msg.
Authored-by: Niranjan Jayakar <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
common/utils/src/main/resources/error/error-classes.json | 2 +-
docs/sql-error-conditions.md | 2 +-
.../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 3 ++-
.../spark/sql/catalyst/expressions/ScalaUDFSuite.scala | 6 ++++--
.../spark/sql/errors/QueryExecutionErrorsSuite.scala | 6 ++++--
.../org/apache/spark/sql/hive/execution/HiveUDFSuite.scala | 14 ++++++++++++--
6 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index 19b70307a1cd..5b70edf249d1 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1067,7 +1067,7 @@
},
"FAILED_EXECUTE_UDF" : {
"message" : [
- "Failed to execute user defined function (<functionName>: (<signature>)
=> <result>)."
+ "User defined function (<functionName>: (<signature>) => <result>)
failed due to: <reason>."
],
"sqlState" : "39000"
},
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index c0f88bffa6e5..71abf10da328 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -643,7 +643,7 @@ Column expression `<expr>` cannot be sorted because its
type `<exprType>` is not
[SQLSTATE:
39000](sql-error-conditions-sqlstates.html#class-39-external-routine-invocation-exception)
-Failed to execute user defined function (`<functionName>`: (`<signature>`) =>
`<result>`).
+User defined function (`<functionName>`: (`<signature>`) => `<result>`) failed
due to: `<reason>`.
### FAILED_FUNCTION_CALL
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1aa25a51fa9c..24332479f193 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -190,7 +190,8 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase with ExecutionE
messageParameters = Map(
"functionName" -> toSQLId(functionName),
"signature" -> inputTypes,
- "result" -> outputType),
+ "result" -> outputType,
+ "reason" -> e.toString),
cause = e)
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 1b40e02aa866..00fc9d462eb6 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -50,13 +50,15 @@ class ScalaUDFSuite extends SparkFunSuite with
ExpressionEvalHelper {
Literal.create(null, StringType) :: Nil,
Option(resolvedEncoder[String]()) :: Nil)
+ val pattern = "User defined function .+ failed due to:
java.lang.NullPointerException".r
+
val e1 = intercept[SparkException](udf.eval())
- assert(e1.getMessage.contains("Failed to execute user defined function"))
+ assert(pattern.findFirstIn(e1.getMessage).isDefined)
val e2 = intercept[SparkException] {
checkEvaluationWithUnsafeProjection(udf, null)
}
- assert(e2.getMessage.contains("Failed to execute user defined function"))
+ assert(pattern.findFirstIn(e2.getMessage).isDefined)
}
test("SPARK-22695: ScalaUDF should not use global variables") {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index a49352cbe508..1e869bfd25aa 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -431,7 +431,8 @@ class QueryExecutionErrorsSuite
parameters = Map(
"functionName" -> functionNameRegex,
"signature" -> "string, int",
- "result" -> "string"),
+ "result" -> "string",
+ "reason" -> "java.lang.StringIndexOutOfBoundsException: begin 5, end
6, length 5"),
matchPVals = true)
}
@@ -455,7 +456,8 @@ class QueryExecutionErrorsSuite
errorClass = "FAILED_EXECUTE_UDF",
parameters = Map("functionName" -> functionNameRegex,
"signature" -> "string, int",
- "result" -> "string"),
+ "result" -> "string",
+ "reason" -> "java.lang.StringIndexOutOfBoundsException: begin 5, end
6, length 5"),
matchPVals = true)
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 3813071b680c..096b11feb9bc 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -754,7 +754,9 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton
with SQLTestUtils {
"functionName" ->
"`org`.`apache`.`hadoop`.`hive`.`ql`.`udf`.`generic`.`GenericUDFAssertTrue`",
"signature" -> "boolean",
- "result" -> "void"))
+ "result" -> "void",
+ "reason" ->
+ "org.apache.hadoop.hive.ql.metadata.HiveException:
ASSERT_TRUE(): assertion failed."))
}
}
}
@@ -778,6 +780,13 @@ class HiveUDFSuite extends QueryTest with
TestHiveSingleton with SQLTestUtils {
withTable("HiveSimpleUDFTable") {
sql(s"create table HiveSimpleUDFTable as select false as v")
val df = sql("SELECT CodeGenHiveSimpleUDF(v) from HiveSimpleUDFTable")
+
+ val reason = """
+ |org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute
method public
+ |boolean
org.apache.spark.sql.hive.execution.SimpleUDFAssertTrue.evaluate(boolean) with
+ |arguments {false}:ASSERT_TRUE(): assertion failed."""
+ .stripMargin.replaceAll("\n", " ").trim
+
checkError(
exception =
intercept[SparkException](df.collect()).getCause.asInstanceOf[SparkException],
errorClass = "FAILED_EXECUTE_UDF",
@@ -785,7 +794,8 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton
with SQLTestUtils {
"functionName" ->
"`org`.`apache`.`spark`.`sql`.`hive`.`execution`.`SimpleUDFAssertTrue`",
"signature" -> "boolean",
- "result" -> "boolean"
+ "result" -> "boolean",
+ "reason" -> reason
)
)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]