cloud-fan commented on a change in pull request #31913:
URL: https://github.com/apache/spark/pull/31913#discussion_r612560780
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##########
@@ -793,6 +817,56 @@ case class Aggregate(
copy(child = newChild)
}
+object Aggregate {
+ private def collectComplexGroupingExpressions(groupingExpressions:
Seq[Expression]) = {
+ groupingExpressions.zipWithIndex
+ .foldLeft(mutable.Map.empty[Expression, (Expression, Int)]) {
+ case (m, (ge, i)) =>
+ if (!ge.foldable && ge.children.nonEmpty &&
!m.contains(ge.canonicalized)) {
+ m += ge.canonicalized -> (ge, i)
+ }
+ m
+ }
+ }
+
+ private def insertGroupingReferences(
+ aggregateExpressions: Seq[NamedExpression],
+ groupingExpressions: collection.Map[Expression, (Expression, Int)]):
Seq[NamedExpression] = {
+ def insertGroupingExprRefs(e: Expression): Expression = {
+ e match {
+ case _ if !e.deterministic => e
+ case _: AggregateExpression => e
+ case _ if PythonUDF.isGroupedAggPandasUDF(e) => e
+ case _ if groupingExpressions.contains(e.canonicalized) =>
+ val (groupingExpression, ordinal) =
groupingExpressions(e.canonicalized)
+ GroupingExprRef(ordinal, groupingExpression.dataType,
groupingExpression.nullable)
+ case _ => e.mapChildren(insertGroupingExprRefs)
+ }
+ }
+
+
aggregateExpressions.map(insertGroupingExprRefs(_).asInstanceOf[NamedExpression])
+ }
+
+ def withGroupingRefs(
+ groupingExpressions: Seq[Expression],
+ aggregateExpressions: Seq[NamedExpression],
+ child: LogicalPlan): Aggregate = {
+ val dealiasedGroupingExpressions = groupingExpressions.map {
+ case a: Alias => a.child
+ case o => o
+ }
+ val complexGroupingExpressions =
+ collectComplexGroupingExpressions(dealiasedGroupingExpressions)
+ val aggrExprWithGroupingReferences = if
(complexGroupingExpressions.nonEmpty) {
+ insertGroupingReferences(aggregateExpressions,
complexGroupingExpressions)
Review comment:
I agree that it's not necessary to replace `Attribute` with
`GroupingExprRef`, but it makes the framework more consistent if we always use
`GroupingExprRef`. We can make `GroupingExprRef` a named expression to fix
issues.
This is just my thought though, I'm happy to hear more opinions.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]