viirya commented on a change in pull request #29643:
URL: https://github.com/apache/spark/pull/29643#discussion_r483211691



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
##########
@@ -168,6 +170,85 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] 
extends TreeNode[PlanT
     }.toSeq
   }
 
+
+  /**
+   * Rewrites this plan tree based on the given plan mappings from old plan 
nodes to new nodes.
+   * This method also updates all the related references in this plan tree 
accordingly, in case
+   * the replaced node has different output expr ID than the old node.
+   */
+  def rewriteWithPlanMapping(
+      planMapping: Map[PlanType, PlanType],
+      canGetOutput: PlanType => Boolean = _ => true): PlanType = {
+    def internalRewrite(plan: PlanType): (PlanType, Seq[(Attribute, 
Attribute)]) = {
+      if (planMapping.contains(plan)) {
+        val newPlan = planMapping(plan)
+        val attrMapping = if (canGetOutput(plan) && canGetOutput(newPlan)) {
+          plan.output.zip(newPlan.output).filter {
+            case (a1, a2) => a1.exprId != a2.exprId
+          }
+        } else {
+          Nil
+        }
+        newPlan -> attrMapping
+      } else {
+        val attrMapping = new mutable.ArrayBuffer[(Attribute, Attribute)]()
+        val newPlan = plan.mapChildren { child =>
+          val (newChild, childAttrMapping) = internalRewrite(child)
+          attrMapping ++= childAttrMapping.filter { case (oldAttr, _) =>
+            // `attrMapping` is not only used to replace the attributes of the 
current `plan`,
+            // but also to be propagated to the parent plans of the current 
`plan`. Therefore,
+            // the `oldAttr` must be part of either `plan.references` (so that 
it can be used to
+            // replace attributes of the current `plan`) or `plan.outputSet` 
(so that it can be
+            // used by those parent plans).
+            (plan.outputSet ++ plan.references).contains(oldAttr)

Review comment:
       oh, we don't check if `plan` is resolved here, and `plan.outputSet` can 
cause error.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to