viirya commented on a change in pull request #26918: [SPARK-30279][SQL] Support
32 or more grouping attributes for GROUPING_ID
URL: https://github.com/apache/spark/pull/26918#discussion_r359170592
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -563,15 +570,44 @@ class Analyzer(
}
}
+ private object EmptyGroupingIDExtractor {
+
+ private def hasEmptyGroupingID(aggExprs: Seq[Expression]): Boolean = {
+ aggExprs.exists { p => p.collectFirst { case GroupingID(Nil) => true
}.isDefined }
+ }
+
+ def unapply(p: LogicalPlan): Option[Seq[Expression]] = p match {
+ case Aggregate(Seq(Cube(groupByExprs)), aggExprs, _) if
hasEmptyGroupingID(aggExprs) =>
+ Some(constructGroupBy(cubeExprs(groupByExprs)))
+ case Aggregate(Seq(Rollup(groupByExprs)), aggExprs, _) if
hasEmptyGroupingID(aggExprs) =>
+ Some(constructGroupBy(rollupExprs(groupByExprs)))
+ case x: GroupingSets if hasEmptyGroupingID(x.aggregations) =>
+ Some(constructGroupBy(x.selectedGroupByExprs))
+ case Filter(cond, child) if hasEmptyGroupingID(cond :: Nil) =>
+ Some(findGroupingExprs(child))
+ case Sort(order, _, child) if hasEmptyGroupingID(order) =>
+ Some(findGroupingExprs(child))
+ case _ =>
+ None
+ }
+ }
+
// This require transformUp to replace grouping()/grouping_id() in
resolved Filter/Sort
def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
case a if !a.childrenResolved => a // be sure all of the children are
resolved.
+ // If a plan has unresolved grouping IDs, we resolve them first
+ case p @ EmptyGroupingIDExtractor(groupByExprs) =>
+ p.transformExpressions {
+ // In case that `GroupingID` has an empty group-by expressions
+ case gid: GroupingID if !gid.resolved => GroupingID(groupByExprs)
+ }
Review comment:
Why is this needed? We decide gid datatype by `GroupingID.groupIdDataType`
above, isn't it enough?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]