This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 02206cd66dbf [SPARK-48047][SQL] Reduce memory pressure of empty TreeNode tags 02206cd66dbf is described below commit 02206cd66dbfc8de602a685b032f1805bcf8e36f Author: Nick Young <nick.yo...@databricks.com> AuthorDate: Tue Apr 30 22:07:20 2024 -0700 [SPARK-48047][SQL] Reduce memory pressure of empty TreeNode tags ### What changes were proposed in this pull request? - Changed the `tags` variable of the `TreeNode` class to initialize lazily. This will reduce unnecessary driver memory pressure. ### Why are the changes needed? - Plans with large expression or operator trees are known to cause driver memory pressure; this is one step in alleviating that issue. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT covers behavior. Outwards facing behavior does not change. ### Was this patch authored or co-authored using generative AI tooling? No Closes #46285 from n-young-db/treenode-tags. Authored-by: Nick Young <nick.yo...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../apache/spark/sql/catalyst/trees/TreeNode.scala | 24 ++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 94e893d468b3..dd39f3182bfb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -78,8 +78,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] /** * A mutable map for holding auxiliary information of this tree node. It will be carried over * when this node is copied via `makeCopy`, or transformed via `transformUp`/`transformDown`. + * We lazily evaluate the `tags` since the default size of a `mutable.Map` is nonzero. This + * will reduce unnecessary memory pressure. */ - private val tags: mutable.Map[TreeNodeTag[_], Any] = mutable.Map.empty + private[this] var _tags: mutable.Map[TreeNodeTag[_], Any] = null + private def tags: mutable.Map[TreeNodeTag[_], Any] = { + if (_tags eq null) { + _tags = mutable.Map.empty + } + _tags + } /** * Default tree pattern [[BitSet] for a [[TreeNode]]. @@ -147,11 +155,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] ineffectiveRules.get(ruleId.id) } + def isTagsEmpty: Boolean = (_tags eq null) || _tags.isEmpty + def copyTagsFrom(other: BaseType): Unit = { // SPARK-32753: it only makes sense to copy tags to a new node // but it's too expensive to detect other cases likes node removal // so we make a compromise here to copy tags to node with no tags - if (tags.isEmpty) { + if (isTagsEmpty && !other.isTagsEmpty) { tags ++= other.tags } } @@ -161,11 +171,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] } def getTagValue[T](tag: TreeNodeTag[T]): Option[T] = { - tags.get(tag).map(_.asInstanceOf[T]) + if (isTagsEmpty) { + None + } else { + tags.get(tag).map(_.asInstanceOf[T]) + } } def unsetTagValue[T](tag: TreeNodeTag[T]): Unit = { - tags -= tag + if (!isTagsEmpty) { + tags -= tag + } } /** --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org