This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d8aca18d98a3 [SPARK-49852][SQL] Fix deadlock caused by explain string
generation
d8aca18d98a3 is described below
commit d8aca18d98a3ffa711c1c2a571fe06962cf9f7d4
Author: Ziqi Liu <[email protected]>
AuthorDate: Tue Oct 8 08:24:14 2024 +0800
[SPARK-49852][SQL] Fix deadlock caused by explain string generation
### What changes were proposed in this pull request?
Replace `TreeNode.allChildren` with identity map (which was `Set`
previously) to avoid hashcode lazy evalution.
### Why are the changes needed?
We hit a deadlock between shuffle dependency initialization and explain
string generation, in which both code paths trigger lazy variable instantiation
while visiting some tree nodes in different orders and thus acquiring object
locks in a reversed order.
The hashcode of plan node is implemented as lazy val. So the fix is to
remove hash code computation from explain string generation so to break the
chain of lazy variable instantiation.
`TreeNode.allChildren` is only used in explain string generation and only
require identity equality. This should be also a small perforamance improvement
BTW.
### Does this PR introduce _any_ user-facing change?
NO
### How was this patch tested?
Current UTs
### Was this patch authored or co-authored using generative AI tooling?
NO
Closes #48375 from liuzqt/SPARK-49852.
Authored-by: Ziqi Liu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/catalyst/trees/TreeNode.scala | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 6683f2dbfb39..4b8556b1bb5d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.trees
-import java.util.UUID
+import java.util.{IdentityHashMap, UUID}
import scala.annotation.nowarn
import scala.collection.{mutable, Map}
@@ -841,7 +841,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
*/
protected def stringArgs: Iterator[Any] = productIterator
- private lazy val allChildren: Set[TreeNode[_]] = (children ++
innerChildren).toSet[TreeNode[_]]
+ private lazy val allChildren: IdentityHashMap[TreeNode[_], Any] = {
+ val set = new IdentityHashMap[TreeNode[_], Any]()
+ (children ++ innerChildren).foreach {
+ set.put(_, null)
+ }
+ set
+ }
private def redactMapString[K, V](map: Map[K, V], maxFields: Int):
List[String] = {
// For security reason, redact the map value if the key is in certain
patterns
@@ -868,11 +874,11 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
/** Returns a string representing the arguments to this node, minus any
children */
def argString(maxFields: Int): String = stringArgs.flatMap {
- case tn: TreeNode[_] if allChildren.contains(tn) => Nil
- case Some(tn: TreeNode[_]) if allChildren.contains(tn) => Nil
+ case tn: TreeNode[_] if allChildren.containsKey(tn) => Nil
+ case Some(tn: TreeNode[_]) if allChildren.containsKey(tn) => Nil
case Some(tn: TreeNode[_]) => tn.simpleString(maxFields) :: Nil
case tn: TreeNode[_] => tn.simpleString(maxFields) :: Nil
- case seq: Seq[Any] if
seq.toSet.subsetOf(allChildren.asInstanceOf[Set[Any]]) => Nil
+ case seq: Seq[Any] if seq.forall(allChildren.containsKey) => Nil
case iter: Iterable[_] if iter.isEmpty => Nil
case array: Array[_] if array.isEmpty => Nil
case xs @ (_: Seq[_] | _: Set[_] | _: Array[_]) =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]