This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a33b9fc60cae [SPARK-51691][CORE][TESTS] SerializationDebugger should 
swallow exception when try to find the reason of serialization problem
a33b9fc60cae is described below

commit a33b9fc60cae3c0cefe0d5027da3fcdefbf8c7b8
Author: summaryzb <summar...@gmail.com>
AuthorDate: Thu Apr 10 21:22:18 2025 +0800

    [SPARK-51691][CORE][TESTS] SerializationDebugger should swallow exception 
when try to find the reason of serialization problem
    
    ### What changes were proposed in this pull request?
    Catch `SparkRuntimeException` when deep into serialization exception stack 
during unit test.
    
    ### Why are the changes needed?
    Present a clearer serialization exception stack hierarchy during test, 
`toString` implementation of `TreeNode` may throw exception since  
`SQLConf.get` check. It is helpful for debug the real problem
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, but it only take effect in unit test.
    User will see the direct serialization exception and the reference chain 
beyond the root cause.
    Before this pr, user will get confuse when unrelated exception is shown
    ```
    WARN org.apache.spark.serializer.SerializationDebugger: Exception in 
serialization debugger
    org.apache.spark.SparkRuntimeException: Cannot get SQLConf inside scheduler 
event loop thread.
        at 
org.apache.spark.sql.errors.QueryExecutionErrors$.cannotGetSQLConfInSchedulerEventLoopThreadError(QueryExecutionErrors.scala:2002)
        at org.apache.spark.sql.internal.SQLConf$.get(SQLConf.scala:225)
        at 
org.apache.spark.sql.execution.ScalarSubquery.toString(subquery.scala:69)
        at java.lang.String.valueOf(String.java:2994)
        at 
scala.collection.mutable.StringBuilder.append(StringBuilder.scala:203)
        at scala.collection.immutable.Stream.addString(Stream.scala:701)
        at scala.collection.TraversableOnce.mkString(TraversableOnce.scala:377)
    org.apache.spark.SparkException: Job aborted due to stage failure: Task not 
serializable: java.io.NotSerializableException:
    org.apache.spark.SimpleFutureAction
    ```
    After this pr
    ```
    org.apache.spark.SparkException: Job aborted due to stage failure: Task not 
serializable: java.io.NotSerializableException: 
org.apache.spark.SimpleFutureAction
    Serialization stack:
            - object not serializable (class: 
org.apache.spark.SimpleFutureAction, value: 
org.apache.spark.SimpleFutureAction4050649d)
            - writeObject data (class: java.util.concurrent.ConcurrentHashMap)
            - object (class java.util.concurrent.ConcurrentHashMap)
            ....(not shown)
    ```
    
    ### How was this patch tested?
    Pass GitHub Actions
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #50489 from summaryzb/SPARK-51691.
    
    Authored-by: summaryzb <summar...@gmail.com>
    Signed-off-by: yangjie01 <yangji...@baidu.com>
---
 .../spark/serializer/SerializationDebugger.scala     | 11 +++++++++--
 .../serializer/SerializationDebuggerSuite.scala      | 20 +++++++++++++++++++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git 
a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala 
b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
index b05babdce169..9687a0a31d01 100644
--- 
a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
+++ 
b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -26,8 +26,10 @@ import scala.annotation.tailrec
 import scala.collection.mutable
 import scala.util.control.NonFatal
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.SparkClassUtils
+import org.apache.spark.util.Utils
 
 private[spark] object SerializationDebugger extends Logging {
 
@@ -110,8 +112,13 @@ private[spark] object SerializationDebugger extends 
Logging {
             val elem = s"externalizable object (class ${e.getClass.getName}, 
$e)"
             visitExternalizable(e, elem :: stack)
 
-          case s: Object with java.io.Serializable =>
-            val elem = s"object (class ${s.getClass.getName}, $s)"
+          case s: Object with java.io.Serializable if Utils.isTesting =>
+            val str = try {
+              s.toString
+            } catch {
+              case _: SparkRuntimeException => "exception in toString"
+            }
+            val elem = s"object (class ${s.getClass.getName}, $str)"
             visitSerializable(s, elem :: stack)
 
           case _ =>
diff --git 
a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
 
b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
index e903cf31d69f..f24502f99b27 100644
--- 
a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
@@ -21,7 +21,7 @@ import java.io._
 
 import scala.annotation.meta.param
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
 
 class SerializationDebuggerSuite extends SparkFunSuite {
 
@@ -180,6 +180,15 @@ class SerializationDebuggerSuite extends SparkFunSuite {
     assert(e.getMessage.contains("SerializableClass2"))  // found debug trace 
should be present
   }
 
+  test("SPARK-51691 improveException swallow underlying exception") {
+    val e = SerializationDebugger.improveException(
+      new SerializableClassWithStringException(new NotSerializable),
+      new NotSerializableException("someClass"))
+    assert(e.getMessage.contains("exception in toString"))
+    assert(e.getMessage.contains("someClass"))
+    assert(e.getMessage.contains("SerializableClassWithStringException"))
+  }
+
   test("improveException with error in debugger") {
     // Object that throws exception in the SerializationDebugger
     val o = new SerializableClass1 {
@@ -205,6 +214,15 @@ class SerializableClass1 extends Serializable
 
 class SerializableClass2(val objectField: Object) extends Serializable
 
+class SerializableClassWithStringException(val objectField: Object) extends 
Serializable {
+  override def toString: String = {
+    // simulate the behavior of TreeNode#toString that SQLConf.get may throw 
exception
+    throw new SparkRuntimeException(errorClass = "INTERNAL_ERROR",
+      messageParameters = Map("message" -> "this is an internal error"),
+      cause = null)
+  }
+}
+
 
 class SerializableArray(val arrayField: Array[Object]) extends Serializable
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to