stefankandic commented on code in PR #56357:
URL: https://github.com/apache/spark/pull/56357#discussion_r3371665620
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala:
##########
@@ -3073,16 +3073,64 @@ class Analyzer(
exprs: Seq[Expression],
agg: Aggregate): (Seq[NamedExpression], Seq[Expression]) = {
val extraAggExprs = new LinkedHashMap[Expression, NamedExpression]
+ val expandedToOriginalMap = buildExpandedToOriginalAttributeMap(agg)
val transformed = exprs.map { e =>
if (!e.resolved) {
e
} else {
- buildAggExprList(e, agg, extraAggExprs)
+ val rewritten = if (expandedToOriginalMap.nonEmpty) {
+ rewriteExpandedAttributesInAggregates(e, expandedToOriginalMap)
+ } else {
+ e
+ }
+ buildAggExprList(rewritten, agg, extraAggExprs)
}
}
(extraAggExprs.values().asScala.toSeq, transformed)
}
+ /**
+ * For GROUPING SETS / CUBE / ROLLUP, the plan below the Aggregate is
+ * Aggregate -> Expand -> Project -> child. The Project introduces aliases
for the grouping
+ * columns, and the Expand produces new attributes that replace them. This
method builds a
+ * mapping from expanded attribute ExprIds back to the original
pre-expansion attributes, so
+ * that aggregate functions in HAVING / ORDER BY can reference the
original columns rather than
+ * the expanded ones (which would fail to resolve against the Aggregate's
child). Expression
+ * keys (e.g. `a + 1`) are safely skipped because aggregate functions
reference the
+ * passthrough column, not the Expand-output expression attribute.
+ */
+ private def buildExpandedToOriginalAttributeMap(
+ agg: Aggregate): Map[ExprId, AttributeReference] = {
+ agg.child match {
Review Comment:
Looking at the `GroupingAnalyticsTransformer.scala` the `Filter` should
never end up between these nodes. I added a new test case just in case (`WHERE`
+ `ORDER BY SUM(...)`)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]