Github user yhuai commented on a diff in the pull request:
https://github.com/apache/spark/pull/9815#discussion_r45297166
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
---
@@ -211,45 +211,31 @@ class Analyzer(
GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations)
case x: GroupingSets =>
val gid = AttributeReference(VirtualColumn.groupingIdName,
IntegerType, false)()
- // We will insert another Projection if the GROUP BY keys contains
the
- // non-attribute expressions. And the top operators can references
those
- // expressions by its alias.
- // e.g. SELECT key%5 as c1 FROM src GROUP BY key%5 ==>
- // SELECT a as c1 FROM (SELECT key%5 AS a FROM src) GROUP BY a
-
- // find all of the non-attribute expressions in the GROUP BY keys
- val nonAttributeGroupByExpressions = new ArrayBuffer[Alias]()
-
- // The pair of (the original GROUP BY key, associated attribute)
- val groupByExprPairs = x.groupByExprs.map(_ match {
- case e: NamedExpression => (e, e.toAttribute)
- case other => {
- val alias = Alias(other, other.toString)()
- nonAttributeGroupByExpressions += alias // add the
non-attributes expression alias
- (other, alias.toAttribute)
- }
- })
-
- // substitute the non-attribute expressions for aggregations.
- val aggregation = x.aggregations.map(expr => expr.transformDown {
- case e =>
groupByExprPairs.find(_._1.semanticEquals(e)).map(_._2).getOrElse(e)
- }.asInstanceOf[NamedExpression])
- // substitute the group by expressions.
- val newGroupByExprs = groupByExprPairs.map(_._2)
+ // Expand works by setting grouping expressions to null as
determined by the bitmasks. To
+ // prevent these null values from being used in an aggregate
instead of the original value
+ // we need to create new aliases for all group by expressions that
will only be used for
+ // the intended purpose.
+ val groupByAliases: Seq[Alias] = x.groupByExprs.map {
+ case e: NamedExpression => Alias(e, e.name)()
+ case other => Alias(other, other.toString)()
+ }
- val child = if (nonAttributeGroupByExpressions.length > 0) {
- // insert additional projection if contains the
- // non-attribute expressions in the GROUP BY keys
- Project(x.child.output ++ nonAttributeGroupByExpressions,
x.child)
- } else {
- x.child
+ val aggregations: Seq[NamedExpression] = x.aggregations.map {
+ // Don't transform AggregateExpressions see SPARK-11275
--- End diff --
Can you explain it more?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]