This is an automated email from the ASF dual-hosted git repository. yangjie01 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a33b9fc60cae [SPARK-51691][CORE][TESTS] SerializationDebugger should swallow exception when try to find the reason of serialization problem a33b9fc60cae is described below commit a33b9fc60cae3c0cefe0d5027da3fcdefbf8c7b8 Author: summaryzb <summar...@gmail.com> AuthorDate: Thu Apr 10 21:22:18 2025 +0800 [SPARK-51691][CORE][TESTS] SerializationDebugger should swallow exception when try to find the reason of serialization problem ### What changes were proposed in this pull request? Catch `SparkRuntimeException` when deep into serialization exception stack during unit test. ### Why are the changes needed? Present a clearer serialization exception stack hierarchy during test, `toString` implementation of `TreeNode` may throw exception since `SQLConf.get` check. It is helpful for debug the real problem ### Does this PR introduce _any_ user-facing change? Yes, but it only take effect in unit test. User will see the direct serialization exception and the reference chain beyond the root cause. Before this pr, user will get confuse when unrelated exception is shown ``` WARN org.apache.spark.serializer.SerializationDebugger: Exception in serialization debugger org.apache.spark.SparkRuntimeException: Cannot get SQLConf inside scheduler event loop thread. at org.apache.spark.sql.errors.QueryExecutionErrors$.cannotGetSQLConfInSchedulerEventLoopThreadError(QueryExecutionErrors.scala:2002) at org.apache.spark.sql.internal.SQLConf$.get(SQLConf.scala:225) at org.apache.spark.sql.execution.ScalarSubquery.toString(subquery.scala:69) at java.lang.String.valueOf(String.java:2994) at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:203) at scala.collection.immutable.Stream.addString(Stream.scala:701) at scala.collection.TraversableOnce.mkString(TraversableOnce.scala:377) org.apache.spark.SparkException: Job aborted due to stage failure: Task not serializable: java.io.NotSerializableException: org.apache.spark.SimpleFutureAction ``` After this pr ``` org.apache.spark.SparkException: Job aborted due to stage failure: Task not serializable: java.io.NotSerializableException: org.apache.spark.SimpleFutureAction Serialization stack: - object not serializable (class: org.apache.spark.SimpleFutureAction, value: org.apache.spark.SimpleFutureAction4050649d) - writeObject data (class: java.util.concurrent.ConcurrentHashMap) - object (class java.util.concurrent.ConcurrentHashMap) ....(not shown) ``` ### How was this patch tested? Pass GitHub Actions ### Was this patch authored or co-authored using generative AI tooling? No Closes #50489 from summaryzb/SPARK-51691. Authored-by: summaryzb <summar...@gmail.com> Signed-off-by: yangjie01 <yangji...@baidu.com> --- .../spark/serializer/SerializationDebugger.scala | 11 +++++++++-- .../serializer/SerializationDebuggerSuite.scala | 20 +++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala index b05babdce169..9687a0a31d01 100644 --- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala +++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala @@ -26,8 +26,10 @@ import scala.annotation.tailrec import scala.collection.mutable import scala.util.control.NonFatal +import org.apache.spark.SparkRuntimeException import org.apache.spark.internal.Logging import org.apache.spark.util.SparkClassUtils +import org.apache.spark.util.Utils private[spark] object SerializationDebugger extends Logging { @@ -110,8 +112,13 @@ private[spark] object SerializationDebugger extends Logging { val elem = s"externalizable object (class ${e.getClass.getName}, $e)" visitExternalizable(e, elem :: stack) - case s: Object with java.io.Serializable => - val elem = s"object (class ${s.getClass.getName}, $s)" + case s: Object with java.io.Serializable if Utils.isTesting => + val str = try { + s.toString + } catch { + case _: SparkRuntimeException => "exception in toString" + } + val elem = s"object (class ${s.getClass.getName}, $str)" visitSerializable(s, elem :: stack) case _ => diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala index e903cf31d69f..f24502f99b27 100644 --- a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala @@ -21,7 +21,7 @@ import java.io._ import scala.annotation.meta.param -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkFunSuite, SparkRuntimeException} class SerializationDebuggerSuite extends SparkFunSuite { @@ -180,6 +180,15 @@ class SerializationDebuggerSuite extends SparkFunSuite { assert(e.getMessage.contains("SerializableClass2")) // found debug trace should be present } + test("SPARK-51691 improveException swallow underlying exception") { + val e = SerializationDebugger.improveException( + new SerializableClassWithStringException(new NotSerializable), + new NotSerializableException("someClass")) + assert(e.getMessage.contains("exception in toString")) + assert(e.getMessage.contains("someClass")) + assert(e.getMessage.contains("SerializableClassWithStringException")) + } + test("improveException with error in debugger") { // Object that throws exception in the SerializationDebugger val o = new SerializableClass1 { @@ -205,6 +214,15 @@ class SerializableClass1 extends Serializable class SerializableClass2(val objectField: Object) extends Serializable +class SerializableClassWithStringException(val objectField: Object) extends Serializable { + override def toString: String = { + // simulate the behavior of TreeNode#toString that SQLConf.get may throw exception + throw new SparkRuntimeException(errorClass = "INTERNAL_ERROR", + messageParameters = Map("message" -> "this is an internal error"), + cause = null) + } +} + class SerializableArray(val arrayField: Array[Object]) extends Serializable --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org