cloud-fan commented on code in PR #49202:
URL: https://github.com/apache/spark/pull/49202#discussion_r1906679195


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala:
##########
@@ -1978,6 +2018,70 @@ object PushPredicateThroughNonJoin extends 
Rule[LogicalPlan] with PredicateHelpe
       case _ => false
     }
   }
+
+  private def removeOriginAlias(expr: Expression): Expression = {
+    expr.transform {
+      case ced: CommonExpressionDef => ced.copy(originAlias = None)
+    }
+  }
+
+  private def rewriteCondition(
+      cond: Expression,
+      aliasMap: AttributeMap[Alias]): Expression = {
+    replaceAlias(rewriteConditionByWith(cond, aliasMap), aliasMap)
+  }
+
+  /**
+   * Use [[With]] to rewrite condition which contains attribute that are not 
cheap.
+   */
+  private def rewriteConditionByWith(
+      cond: Expression,
+      aliasMap: AttributeMap[Alias]): Expression = {
+    if (!SQLConf.get.getConf(SQLConf.ALWAYS_INLINE_COMMON_EXPR)) {
+      val replaceWithMap = cond.collect {case a: Attribute => a }
+        .distinct
+        .filter(attr => aliasMap.contains(attr))
+        .map(attr => attr -> aliasMap(attr))
+        .filter(m => !CollapseProject.isCheap(m._2))
+      if (replaceWithMap.isEmpty) {
+        cond
+      } else {
+        val defsMap = AttributeMap(replaceWithMap.map(m =>
+          m._1 -> CommonExpressionDef(child = trimAliases(m._2), originAlias = 
Some(m._2))))
+        val refsMap = AttributeMap(defsMap.map(m => m._1 -> new 
CommonExpressionRef(m._2)))
+        splitConjunctivePredicates(cond)
+          .map(rewriteByWith(_, defsMap, refsMap))

Review Comment:
   shall we wrap the entire condition with a single `With`? This is to make 
sure we don't duplicate a huge expression tree in the query plan.
   
   We can temporarily strip `With` when splitting the condition when pushing 
down predicates through Aggregate, which won't have any duplication because the 
splitting depends on the references, so the `pushDown` and `stayUp` won't 
reference the same common expressions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to