cloud-fan commented on code in PR #46034:
URL: https://github.com/apache/spark/pull/46034#discussion_r1568259745


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala:
##########
@@ -21,36 +21,68 @@ import scala.collection.mutable
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, PlanHelper, 
Project}
+import org.apache.spark.sql.catalyst.planning.PhysicalAggregation
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, 
PlanHelper, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMON_EXPR_REF, 
WITH_EXPRESSION}
 
 /**
  * Rewrites the `With` expressions by adding a `Project` to pre-evaluate the 
common expressions, or
  * just inline them if they are cheap.
  *
+ * Since this rule can introduce new `Project` operators, it is advised to run 
[[CollapseProject]]
+ * after this rule.
+ *
  * Note: For now we only use `With` in a few `RuntimeReplaceable` expressions. 
If we expand its
  *       usage, we should support aggregate/window functions as well.
  */
 object RewriteWithExpression extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    
plan.transformDownWithSubqueriesAndPruning(_.containsPattern(WITH_EXPRESSION)) {
+    
plan.transformUpWithSubqueriesAndPruning(_.containsPattern(WITH_EXPRESSION)) {
+      // For aggregates, separate the computation of the aggregations 
themselves from the final
+      // result by moving the final result computation into a projection above 
it. This prevents
+      // this rule from producing an invalid Aggregate operator.
+      case p @ PhysicalAggregation(
+          groupingExpressions, aggregateExpressions, resultExpressions, child)
+          if p.expressions.exists(_.containsPattern(WITH_EXPRESSION)) =>
+        // There should not be dangling common expression references in the 
aggregate expressions.
+        // This can happen if a With is created with an aggregate function in 
its child.
+        assert(!aggregateExpressions.exists(ae =>

Review Comment:
   Shall we do the assert in the constructor of `With`, to fail earlier?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to