cloud-fan commented on a change in pull request #25111: [SPARK-28346][SQL] 
clone the query plan between analyzer, optimizer and planner
URL: https://github.com/apache/spark/pull/25111#discussion_r302386462
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/LogicalPlanStats.scala
 ##########
 @@ -18,33 +18,38 @@
 package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 
 /**
  * A trait to add statistics propagation to [[LogicalPlan]].
  */
 trait LogicalPlanStats { self: LogicalPlan =>
+  import LogicalPlanStats.STATS_CACHE_TAG
 
   /**
    * Returns the estimated statistics for the current logical plan node. Under 
the hood, this
    * method caches the return value, which is computed based on the 
configuration passed in the
    * first time. If the configuration changes, the cache can be invalidated by 
calling
    * [[invalidateStatsCache()]].
    */
-  def stats: Statistics = statsCache.getOrElse {
+  def stats: Statistics = statsOpt.getOrElse {
     if (conf.cboEnabled) {
-      statsCache = Option(BasicStatsPlanVisitor.visit(self))
+      setTagValue(STATS_CACHE_TAG, BasicStatsPlanVisitor.visit(self))
     } else {
-      statsCache = Option(SizeInBytesOnlyStatsPlanVisitor.visit(self))
+      setTagValue(STATS_CACHE_TAG, SizeInBytesOnlyStatsPlanVisitor.visit(self))
     }
-    statsCache.get
+    statsOpt.get
   }
 
-  /** A cache for the estimated statistics, such that it will only be computed 
once. */
-  protected var statsCache: Option[Statistics] = None
 
 Review comment:
   it's fragile to use member variable to keep stats, as they will be lost 
after copy.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to