This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 372c3d15c02b [SPARK-51268][SQL] Make `TreeNode` lock-free
372c3d15c02b is described below

commit 372c3d15c02b0261a3d5ca9e1aae9e4ecd022da9
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Wed Jul 2 12:45:48 2025 +0800

    [SPARK-51268][SQL] Make `TreeNode` lock-free
    
    ### What changes were proposed in this pull request?
    Make `TreeNode` lock-free, by replace `lazy val` with `BestEffortLazyVal`
    
    ### Why are the changes needed?
    In several deadlock issues, we observed that the lock of 
`TreeNode._hashCode` is the root cause.
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #50019 from zhengruifeng/sql_tree_node.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/catalyst/plans/QueryPlan.scala |  5 +++--
 .../apache/spark/sql/catalyst/trees/TreeNode.scala  | 21 +++++++++++++--------
 .../spark/sql/catalyst/trees/TreePatternBits.scala  |  2 +-
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 3f3a60e19641..7801cd347f7d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -97,7 +97,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   def outputOrdering: Seq[SortOrder] = Nil
 
   // Override `treePatternBits` to propagate bits for its expressions.
-  override lazy val treePatternBits: BitSet = {
+  private val _treePatternBits = new BestEffortLazyVal[BitSet](() => {
     val bits: BitSet = getDefaultTreePatternBits
     // Propagate expressions' pattern bits
     val exprIterator = expressions.iterator
@@ -105,7 +105,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
       bits.union(exprIterator.next().treePatternBits)
     }
     bits
-  }
+  })
+  override def treePatternBits: BitSet = _treePatternBits()
 
   /**
    * The set of all attributes that are input to this operator by its children.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 0ada02e6d29b..bbef465d9cc1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -49,8 +49,8 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{BestEffortLazyVal, Utils}
 import org.apache.spark.util.ArrayImplicits._
-import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.BitSet
 
 /** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given 
number */
@@ -118,7 +118,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
    * A BitSet of tree patterns for this TreeNode and its subtree. If this 
TreeNode and its
    * subtree contains a pattern `P`, the corresponding bit for `P.id` is set 
in this BitSet.
    */
-  override lazy val treePatternBits: BitSet = getDefaultTreePatternBits
+  private val _treePatternBits = new BestEffortLazyVal[BitSet](() => 
getDefaultTreePatternBits)
+  override def treePatternBits: BitSet = _treePatternBits()
 
   /**
    * A BitSet of rule ids to record ineffective rules for this TreeNode and 
its subtree.
@@ -204,12 +205,15 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
    */
   def children: Seq[BaseType]
 
-  lazy val containsChild: Set[TreeNode[_]] = children.toSet
+  private val _containsChild = new BestEffortLazyVal[Set[TreeNode[_]]](() => 
children.toSet)
+  def containsChild: Set[TreeNode[_]] = _containsChild()
 
-  lazy val height: Int = children.map(_.height).reduceOption(_ max 
_).getOrElse(0) + 1
+  private val _height = new BestEffortLazyVal[Integer](() =>
+    children.map(_.height).reduceOption(_ max _).getOrElse(0) + 1)
+  def height: Int = _height()
 
-  private lazy val _hashCode: Int = MurmurHash3.productHash(this)
-  override def hashCode(): Int = _hashCode
+  private val _hashCode = new BestEffortLazyVal[Integer](() => 
MurmurHash3.productHash(this))
+  override def hashCode(): Int = _hashCode()
 
   /**
    * Faster version of equality which short-circuits when two treeNodes are 
the same instance.
@@ -847,13 +851,14 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
    */
   protected def stringArgs: Iterator[Any] = productIterator
 
-  private lazy val allChildren: IdentityHashMap[TreeNode[_], Any] = {
+  private val _allChildren = new 
BestEffortLazyVal[IdentityHashMap[TreeNode[_], Any]](() => {
     val set = new IdentityHashMap[TreeNode[_], Any]()
     (children ++ innerChildren).foreach {
       set.put(_, null)
     }
     set
-  }
+  })
+  private def allChildren = _allChildren()
 
   private def redactMapString[K, V](map: Map[K, V], maxFields: Int): 
List[String] = {
     // For security reason, redact the map value if the key is in certain 
patterns
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
index 7b0c99b35203..963b6d27fc79 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
@@ -22,7 +22,7 @@ import org.apache.spark.util.collection.BitSet
 
 // A wrapper of BitSet for pattern enums.
 trait TreePatternBits {
-  protected val treePatternBits: BitSet
+  protected def treePatternBits: BitSet
 
   /**
    * @param t, the tree pattern enum to be tested.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to