This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8230f16164b1 [SPARK-45317][SQL][CONNECT] Handle null filename in stack
traces of exceptions
8230f16164b1 is described below
commit 8230f16164b1cbd20ca0cb052c28c9fdb8d892d1
Author: Yihong He <[email protected]>
AuthorDate: Tue Sep 26 11:04:14 2023 +0900
[SPARK-45317][SQL][CONNECT] Handle null filename in stack traces of
exceptions
### What changes were proposed in this pull request?
- Handle null filename in stack traces of exceptions
- Change the filename field in protobuf to optional
### Why are the changes needed?
- In Java exceptions, filename is the only field that can be nullable and
null filename may cause NullPointerException
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- `build/sbt "connect-client-jvm/testOnly *ClientE2ETestSuite"`
- `build/sbt "connect-client-jvm/testOnly *ClientStreamingQuerySuite"`
### Was this patch authored or co-authored using generative AI tooling?
Closes #43103 from heyihong/SPARK-45317.
Authored-by: Yihong He <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../org/apache/spark/sql/ClientE2ETestSuite.scala | 28 ++++++++++++++++++++++
.../src/main/protobuf/spark/connect/base.proto | 2 +-
.../connect/client/GrpcExceptionConverter.scala | 2 +-
.../spark/sql/connect/utils/ErrorUtils.scala | 10 +++++---
.../service/FetchErrorDetailsHandlerSuite.scala | 25 +++++++++++++++++++
python/pyspark/sql/connect/proto/base_pb2.py | 14 +++++------
python/pyspark/sql/connect/proto/base_pb2.pyi | 13 +++++++++-
7 files changed, 81 insertions(+), 13 deletions(-)
diff --git
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index ec9b1698a4ee..55718ed9c0be 100644
---
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -45,6 +45,34 @@ import org.apache.spark.sql.types._
class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with
PrivateMethodTester {
+ test(s"throw SparkException with null filename in stack trace elements") {
+ withSQLConf("spark.sql.connect.enrichError.enabled" -> "true") {
+ val session = spark
+ import session.implicits._
+
+ val throwException =
+ udf((_: String) => {
+ val testError = new SparkException("test")
+ val stackTrace = testError.getStackTrace()
+ stackTrace(0) = new StackTraceElement(
+ stackTrace(0).getClassName,
+ stackTrace(0).getMethodName,
+ null,
+ stackTrace(0).getLineNumber)
+ testError.setStackTrace(stackTrace)
+ throw testError
+ })
+
+ val ex = intercept[SparkException] {
+ Seq("1").toDS.withColumn("udf_val", throwException($"value")).collect()
+ }
+
+ assert(ex.getCause.isInstanceOf[SparkException])
+ assert(ex.getCause.getStackTrace().length > 0)
+ assert(ex.getCause.getStackTrace()(0).getFileName == null)
+ }
+ }
+
for (enrichErrorEnabled <- Seq(false, true)) {
test(s"cause exception - ${enrichErrorEnabled}") {
withSQLConf("spark.sql.connect.enrichError.enabled" ->
enrichErrorEnabled.toString) {
diff --git
a/connector/connect/common/src/main/protobuf/spark/connect/base.proto
b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
index e5317cae6dc8..b30c578421c2 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -808,7 +808,7 @@ message FetchErrorDetailsResponse {
string method_name = 2;
// The name of the file containing the execution point.
- string file_name = 3;
+ optional string file_name = 3;
// The line number of the source line containing the execution point.
int32 line_number = 4;
diff --git
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
index edbc434ef964..2d86e8c1e417 100644
---
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
+++
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
@@ -222,7 +222,7 @@ private object GrpcExceptionConverter {
new StackTraceElement(
stackTraceElement.getDeclaringClass,
stackTraceElement.getMethodName,
- stackTraceElement.getFileName,
+ if (stackTraceElement.hasFileName) stackTraceElement.getFileName
else null,
stackTraceElement.getLineNumber)
})
}
diff --git
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
index 1abd44608cd0..78c1f723c902 100644
---
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
+++
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
@@ -92,13 +92,17 @@ private[connect] object ErrorUtils extends Logging {
builder.addAllStackTrace(
currentError.getStackTrace
.map { stackTraceElement =>
- FetchErrorDetailsResponse.StackTraceElement
+ val stackTraceBuilder =
FetchErrorDetailsResponse.StackTraceElement
.newBuilder()
.setDeclaringClass(stackTraceElement.getClassName)
.setMethodName(stackTraceElement.getMethodName)
- .setFileName(stackTraceElement.getFileName)
.setLineNumber(stackTraceElement.getLineNumber)
- .build()
+
+ if (stackTraceElement.getFileName != null) {
+ stackTraceBuilder.setFileName(stackTraceElement.getFileName)
+ }
+
+ stackTraceBuilder.build()
}
.toIterable
.asJava)
diff --git
a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
index c0591dcc9c7b..7633fa7df5d5 100644
---
a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
+++
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
@@ -163,4 +163,29 @@ class FetchErrorDetailsHandlerSuite extends
SharedSparkSession with ResourceHelp
testError = new Exception(s"test$i", testError)
}
}
+
+ test("null filename in stack trace elements") {
+ val testError = new Exception("test")
+ val stackTrace = testError.getStackTrace()
+ stackTrace(0) = new StackTraceElement(
+ stackTrace(0).getClassName,
+ stackTrace(0).getMethodName,
+ null,
+ stackTrace(0).getLineNumber)
+ testError.setStackTrace(stackTrace)
+
+ val errorId = UUID.randomUUID().toString()
+
+ SparkConnectService
+ .getOrCreateIsolatedSession(userId, sessionId)
+ .errorIdToError
+ .put(errorId, testError)
+
+ val response = fetchErrorDetails(userId, sessionId, errorId)
+ assert(response.hasRootErrorIdx)
+ assert(response.getRootErrorIdx == 0)
+
+ assert(response.getErrors(0).getStackTraceCount > 0)
+ assert(!response.getErrors(0).getStackTrace(0).hasFileName)
+ }
}
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py
b/python/pyspark/sql/connect/proto/base_pb2.py
index 0634c9b35620..0cc8085763ce 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.py
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -37,7 +37,7 @@ from pyspark.sql.connect.proto import types_pb2 as
spark_dot_connect_dot_types__
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-
b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01
\x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02
\x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17
[...]
+
b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01
\x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02
\x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17
[...]
)
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -200,11 +200,11 @@ if _descriptor._USE_C_DESCRIPTORS == False:
_FETCHERRORDETAILSREQUEST._serialized_start = 11301
_FETCHERRORDETAILSREQUEST._serialized_end = 11502
_FETCHERRORDETAILSRESPONSE._serialized_start = 11505
- _FETCHERRORDETAILSRESPONSE._serialized_end = 12051
+ _FETCHERRORDETAILSRESPONSE._serialized_end = 12070
_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 11650
- _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 11805
- _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 11808
- _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 12032
- _SPARKCONNECTSERVICE._serialized_start = 12054
- _SPARKCONNECTSERVICE._serialized_end = 12903
+ _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 11824
+ _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 11827
+ _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 12051
+ _SPARKCONNECTSERVICE._serialized_start = 12073
+ _SPARKCONNECTSERVICE._serialized_end = 12922
# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi
b/python/pyspark/sql/connect/proto/base_pb2.pyi
index f154b199ce84..6320ec7bb56b 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -2816,12 +2816,20 @@ class
FetchErrorDetailsResponse(google.protobuf.message.Message):
*,
declaring_class: builtins.str = ...,
method_name: builtins.str = ...,
- file_name: builtins.str = ...,
+ file_name: builtins.str | None = ...,
line_number: builtins.int = ...,
) -> None: ...
+ def HasField(
+ self,
+ field_name: typing_extensions.Literal[
+ "_file_name", b"_file_name", "file_name", b"file_name"
+ ],
+ ) -> builtins.bool: ...
def ClearField(
self,
field_name: typing_extensions.Literal[
+ "_file_name",
+ b"_file_name",
"declaring_class",
b"declaring_class",
"file_name",
@@ -2832,6 +2840,9 @@ class
FetchErrorDetailsResponse(google.protobuf.message.Message):
b"method_name",
],
) -> None: ...
+ def WhichOneof(
+ self, oneof_group: typing_extensions.Literal["_file_name",
b"_file_name"]
+ ) -> typing_extensions.Literal["file_name"] | None: ...
class Error(google.protobuf.message.Message):
"""Error defines the schema for the representing exception."""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]