This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 372c3d15c02b [SPARK-51268][SQL] Make `TreeNode` lock-free
372c3d15c02b is described below
commit 372c3d15c02b0261a3d5ca9e1aae9e4ecd022da9
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Wed Jul 2 12:45:48 2025 +0800
[SPARK-51268][SQL] Make `TreeNode` lock-free
### What changes were proposed in this pull request?
Make `TreeNode` lock-free, by replace `lazy val` with `BestEffortLazyVal`
### Why are the changes needed?
In several deadlock issues, we observed that the lock of
`TreeNode._hashCode` is the root cause.
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #50019 from zhengruifeng/sql_tree_node.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/catalyst/plans/QueryPlan.scala | 5 +++--
.../apache/spark/sql/catalyst/trees/TreeNode.scala | 21 +++++++++++++--------
.../spark/sql/catalyst/trees/TreePatternBits.scala | 2 +-
3 files changed, 17 insertions(+), 11 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 3f3a60e19641..7801cd347f7d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -97,7 +97,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
def outputOrdering: Seq[SortOrder] = Nil
// Override `treePatternBits` to propagate bits for its expressions.
- override lazy val treePatternBits: BitSet = {
+ private val _treePatternBits = new BestEffortLazyVal[BitSet](() => {
val bits: BitSet = getDefaultTreePatternBits
// Propagate expressions' pattern bits
val exprIterator = expressions.iterator
@@ -105,7 +105,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
bits.union(exprIterator.next().treePatternBits)
}
bits
- }
+ })
+ override def treePatternBits: BitSet = _treePatternBits()
/**
* The set of all attributes that are input to this operator by its children.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 0ada02e6d29b..bbef465d9cc1 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -49,8 +49,8 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{BestEffortLazyVal, Utils}
import org.apache.spark.util.ArrayImplicits._
-import org.apache.spark.util.Utils
import org.apache.spark.util.collection.BitSet
/** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given
number */
@@ -118,7 +118,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
* A BitSet of tree patterns for this TreeNode and its subtree. If this
TreeNode and its
* subtree contains a pattern `P`, the corresponding bit for `P.id` is set
in this BitSet.
*/
- override lazy val treePatternBits: BitSet = getDefaultTreePatternBits
+ private val _treePatternBits = new BestEffortLazyVal[BitSet](() =>
getDefaultTreePatternBits)
+ override def treePatternBits: BitSet = _treePatternBits()
/**
* A BitSet of rule ids to record ineffective rules for this TreeNode and
its subtree.
@@ -204,12 +205,15 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
*/
def children: Seq[BaseType]
- lazy val containsChild: Set[TreeNode[_]] = children.toSet
+ private val _containsChild = new BestEffortLazyVal[Set[TreeNode[_]]](() =>
children.toSet)
+ def containsChild: Set[TreeNode[_]] = _containsChild()
- lazy val height: Int = children.map(_.height).reduceOption(_ max
_).getOrElse(0) + 1
+ private val _height = new BestEffortLazyVal[Integer](() =>
+ children.map(_.height).reduceOption(_ max _).getOrElse(0) + 1)
+ def height: Int = _height()
- private lazy val _hashCode: Int = MurmurHash3.productHash(this)
- override def hashCode(): Int = _hashCode
+ private val _hashCode = new BestEffortLazyVal[Integer](() =>
MurmurHash3.productHash(this))
+ override def hashCode(): Int = _hashCode()
/**
* Faster version of equality which short-circuits when two treeNodes are
the same instance.
@@ -847,13 +851,14 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
*/
protected def stringArgs: Iterator[Any] = productIterator
- private lazy val allChildren: IdentityHashMap[TreeNode[_], Any] = {
+ private val _allChildren = new
BestEffortLazyVal[IdentityHashMap[TreeNode[_], Any]](() => {
val set = new IdentityHashMap[TreeNode[_], Any]()
(children ++ innerChildren).foreach {
set.put(_, null)
}
set
- }
+ })
+ private def allChildren = _allChildren()
private def redactMapString[K, V](map: Map[K, V], maxFields: Int):
List[String] = {
// For security reason, redact the map value if the key is in certain
patterns
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
index 7b0c99b35203..963b6d27fc79 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatternBits.scala
@@ -22,7 +22,7 @@ import org.apache.spark.util.collection.BitSet
// A wrapper of BitSet for pattern enums.
trait TreePatternBits {
- protected val treePatternBits: BitSet
+ protected def treePatternBits: BitSet
/**
* @param t, the tree pattern enum to be tested.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]