This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 02206cd66dbf [SPARK-48047][SQL] Reduce memory pressure of empty 
TreeNode tags
02206cd66dbf is described below

commit 02206cd66dbfc8de602a685b032f1805bcf8e36f
Author: Nick Young <nick.yo...@databricks.com>
AuthorDate: Tue Apr 30 22:07:20 2024 -0700

    [SPARK-48047][SQL] Reduce memory pressure of empty TreeNode tags
    
    ### What changes were proposed in this pull request?
    
    - Changed the `tags` variable of the `TreeNode` class to initialize lazily. 
This will reduce unnecessary driver memory pressure.
    
    ### Why are the changes needed?
    
    - Plans with large expression or operator trees are known to cause driver 
memory pressure; this is one step in alleviating that issue.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing UT covers behavior. Outwards facing behavior does not change.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #46285 from n-young-db/treenode-tags.
    
    Authored-by: Nick Young <nick.yo...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../apache/spark/sql/catalyst/trees/TreeNode.scala | 24 ++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 94e893d468b3..dd39f3182bfb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -78,8 +78,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
   /**
    * A mutable map for holding auxiliary information of this tree node. It 
will be carried over
    * when this node is copied via `makeCopy`, or transformed via 
`transformUp`/`transformDown`.
+   * We lazily evaluate the `tags` since the default size of a `mutable.Map` 
is nonzero. This
+   * will reduce unnecessary memory pressure.
    */
-  private val tags: mutable.Map[TreeNodeTag[_], Any] = mutable.Map.empty
+  private[this] var _tags: mutable.Map[TreeNodeTag[_], Any] = null
+  private def tags: mutable.Map[TreeNodeTag[_], Any] = {
+    if (_tags eq null) {
+      _tags = mutable.Map.empty
+    }
+    _tags
+  }
 
   /**
    * Default tree pattern [[BitSet] for a [[TreeNode]].
@@ -147,11 +155,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
     ineffectiveRules.get(ruleId.id)
   }
 
+  def isTagsEmpty: Boolean = (_tags eq null) || _tags.isEmpty
+
   def copyTagsFrom(other: BaseType): Unit = {
     // SPARK-32753: it only makes sense to copy tags to a new node
     // but it's too expensive to detect other cases likes node removal
     // so we make a compromise here to copy tags to node with no tags
-    if (tags.isEmpty) {
+    if (isTagsEmpty && !other.isTagsEmpty) {
       tags ++= other.tags
     }
   }
@@ -161,11 +171,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
   }
 
   def getTagValue[T](tag: TreeNodeTag[T]): Option[T] = {
-    tags.get(tag).map(_.asInstanceOf[T])
+    if (isTagsEmpty) {
+      None
+    } else {
+      tags.get(tag).map(_.asInstanceOf[T])
+    }
   }
 
   def unsetTagValue[T](tag: TreeNodeTag[T]): Unit = {
-    tags -= tag
+    if (!isTagsEmpty) {
+      tags -= tag
+    }
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to