peter-toth commented on a change in pull request #31913:
URL: https://github.com/apache/spark/pull/31913#discussion_r612688038



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##########
@@ -793,6 +817,56 @@ case class Aggregate(
     copy(child = newChild)
 }
 
+object Aggregate {
+  private def collectComplexGroupingExpressions(groupingExpressions: 
Seq[Expression]) = {
+    groupingExpressions.zipWithIndex
+      .foldLeft(mutable.Map.empty[Expression, (Expression, Int)]) {
+        case (m, (ge, i)) =>
+          if (!ge.foldable && ge.children.nonEmpty && 
!m.contains(ge.canonicalized)) {
+            m += ge.canonicalized -> (ge, i)
+          }
+          m
+      }
+  }
+
+  private def insertGroupingReferences(
+      aggregateExpressions: Seq[NamedExpression],
+      groupingExpressions: collection.Map[Expression, (Expression, Int)]): 
Seq[NamedExpression] = {
+    def insertGroupingExprRefs(e: Expression): Expression = {
+      e match {
+        case _ if !e.deterministic => e
+        case _: AggregateExpression => e
+        case _ if PythonUDF.isGroupedAggPandasUDF(e) => e
+        case _ if groupingExpressions.contains(e.canonicalized) =>
+          val (groupingExpression, ordinal) = 
groupingExpressions(e.canonicalized)
+          GroupingExprRef(ordinal, groupingExpression.dataType, 
groupingExpression.nullable)
+        case _ => e.mapChildren(insertGroupingExprRefs)
+      }
+    }
+
+    
aggregateExpressions.map(insertGroupingExprRefs(_).asInstanceOf[NamedExpression])
+  }
+
+  def withGroupingRefs(
+      groupingExpressions: Seq[Expression],
+      aggregateExpressions: Seq[NamedExpression],
+      child: LogicalPlan): Aggregate = {
+    val dealiasedGroupingExpressions = groupingExpressions.map {
+      case a: Alias => a.child
+      case o => o
+    }
+    val complexGroupingExpressions =
+      collectComplexGroupingExpressions(dealiasedGroupingExpressions)
+    val aggrExprWithGroupingReferences = if 
(complexGroupingExpressions.nonEmpty) {
+      insertGroupingReferences(aggregateExpressions, 
complexGroupingExpressions)

Review comment:
       I also see the pros of making this framework more general, and I'm happy 
to do it, but it is not required for this bugfix. How about doing it in a 
follow-up ticket?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to