This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8230f16164b1 [SPARK-45317][SQL][CONNECT] Handle null filename in stack traces of exceptions 8230f16164b1 is described below commit 8230f16164b1cbd20ca0cb052c28c9fdb8d892d1 Author: Yihong He <yihong...@databricks.com> AuthorDate: Tue Sep 26 11:04:14 2023 +0900 [SPARK-45317][SQL][CONNECT] Handle null filename in stack traces of exceptions ### What changes were proposed in this pull request? - Handle null filename in stack traces of exceptions - Change the filename field in protobuf to optional ### Why are the changes needed? - In Java exceptions, filename is the only field that can be nullable and null filename may cause NullPointerException ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - `build/sbt "connect-client-jvm/testOnly *ClientE2ETestSuite"` - `build/sbt "connect-client-jvm/testOnly *ClientStreamingQuerySuite"` ### Was this patch authored or co-authored using generative AI tooling? Closes #43103 from heyihong/SPARK-45317. Authored-by: Yihong He <yihong...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../org/apache/spark/sql/ClientE2ETestSuite.scala | 28 ++++++++++++++++++++++ .../src/main/protobuf/spark/connect/base.proto | 2 +- .../connect/client/GrpcExceptionConverter.scala | 2 +- .../spark/sql/connect/utils/ErrorUtils.scala | 10 +++++--- .../service/FetchErrorDetailsHandlerSuite.scala | 25 +++++++++++++++++++ python/pyspark/sql/connect/proto/base_pb2.py | 14 +++++------ python/pyspark/sql/connect/proto/base_pb2.pyi | 13 +++++++++- 7 files changed, 81 insertions(+), 13 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala index ec9b1698a4ee..55718ed9c0be 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala @@ -45,6 +45,34 @@ import org.apache.spark.sql.types._ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateMethodTester { + test(s"throw SparkException with null filename in stack trace elements") { + withSQLConf("spark.sql.connect.enrichError.enabled" -> "true") { + val session = spark + import session.implicits._ + + val throwException = + udf((_: String) => { + val testError = new SparkException("test") + val stackTrace = testError.getStackTrace() + stackTrace(0) = new StackTraceElement( + stackTrace(0).getClassName, + stackTrace(0).getMethodName, + null, + stackTrace(0).getLineNumber) + testError.setStackTrace(stackTrace) + throw testError + }) + + val ex = intercept[SparkException] { + Seq("1").toDS.withColumn("udf_val", throwException($"value")).collect() + } + + assert(ex.getCause.isInstanceOf[SparkException]) + assert(ex.getCause.getStackTrace().length > 0) + assert(ex.getCause.getStackTrace()(0).getFileName == null) + } + } + for (enrichErrorEnabled <- Seq(false, true)) { test(s"cause exception - ${enrichErrorEnabled}") { withSQLConf("spark.sql.connect.enrichError.enabled" -> enrichErrorEnabled.toString) { diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index e5317cae6dc8..b30c578421c2 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -808,7 +808,7 @@ message FetchErrorDetailsResponse { string method_name = 2; // The name of the file containing the execution point. - string file_name = 3; + optional string file_name = 3; // The line number of the source line containing the execution point. int32 line_number = 4; diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala index edbc434ef964..2d86e8c1e417 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala @@ -222,7 +222,7 @@ private object GrpcExceptionConverter { new StackTraceElement( stackTraceElement.getDeclaringClass, stackTraceElement.getMethodName, - stackTraceElement.getFileName, + if (stackTraceElement.hasFileName) stackTraceElement.getFileName else null, stackTraceElement.getLineNumber) }) } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala index 1abd44608cd0..78c1f723c902 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala @@ -92,13 +92,17 @@ private[connect] object ErrorUtils extends Logging { builder.addAllStackTrace( currentError.getStackTrace .map { stackTraceElement => - FetchErrorDetailsResponse.StackTraceElement + val stackTraceBuilder = FetchErrorDetailsResponse.StackTraceElement .newBuilder() .setDeclaringClass(stackTraceElement.getClassName) .setMethodName(stackTraceElement.getMethodName) - .setFileName(stackTraceElement.getFileName) .setLineNumber(stackTraceElement.getLineNumber) - .build() + + if (stackTraceElement.getFileName != null) { + stackTraceBuilder.setFileName(stackTraceElement.getFileName) + } + + stackTraceBuilder.build() } .toIterable .asJava) diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala index c0591dcc9c7b..7633fa7df5d5 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala @@ -163,4 +163,29 @@ class FetchErrorDetailsHandlerSuite extends SharedSparkSession with ResourceHelp testError = new Exception(s"test$i", testError) } } + + test("null filename in stack trace elements") { + val testError = new Exception("test") + val stackTrace = testError.getStackTrace() + stackTrace(0) = new StackTraceElement( + stackTrace(0).getClassName, + stackTrace(0).getMethodName, + null, + stackTrace(0).getLineNumber) + testError.setStackTrace(stackTrace) + + val errorId = UUID.randomUUID().toString() + + SparkConnectService + .getOrCreateIsolatedSession(userId, sessionId) + .errorIdToError + .put(errorId, testError) + + val response = fetchErrorDetails(userId, sessionId, errorId) + assert(response.hasRootErrorIdx) + assert(response.getRootErrorIdx == 0) + + assert(response.getErrors(0).getStackTraceCount > 0) + assert(!response.getErrors(0).getStackTrace(0).hasFileName) + } } diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index 0634c9b35620..0cc8085763ce 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17 [...] + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17 [...] ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -200,11 +200,11 @@ if _descriptor._USE_C_DESCRIPTORS == False: _FETCHERRORDETAILSREQUEST._serialized_start = 11301 _FETCHERRORDETAILSREQUEST._serialized_end = 11502 _FETCHERRORDETAILSRESPONSE._serialized_start = 11505 - _FETCHERRORDETAILSRESPONSE._serialized_end = 12051 + _FETCHERRORDETAILSRESPONSE._serialized_end = 12070 _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 11650 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 11805 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 11808 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 12032 - _SPARKCONNECTSERVICE._serialized_start = 12054 - _SPARKCONNECTSERVICE._serialized_end = 12903 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 11824 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 11827 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 12051 + _SPARKCONNECTSERVICE._serialized_start = 12073 + _SPARKCONNECTSERVICE._serialized_end = 12922 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index f154b199ce84..6320ec7bb56b 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -2816,12 +2816,20 @@ class FetchErrorDetailsResponse(google.protobuf.message.Message): *, declaring_class: builtins.str = ..., method_name: builtins.str = ..., - file_name: builtins.str = ..., + file_name: builtins.str | None = ..., line_number: builtins.int = ..., ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "_file_name", b"_file_name", "file_name", b"file_name" + ], + ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ + "_file_name", + b"_file_name", "declaring_class", b"declaring_class", "file_name", @@ -2832,6 +2840,9 @@ class FetchErrorDetailsResponse(google.protobuf.message.Message): b"method_name", ], ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["_file_name", b"_file_name"] + ) -> typing_extensions.Literal["file_name"] | None: ... class Error(google.protobuf.message.Message): """Error defines the schema for the representing exception.""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org