cloud-fan commented on code in PR #41763:
URL: https://github.com/apache/spark/pull/41763#discussion_r1345268202
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala:
##########
@@ -140,4 +144,80 @@ object ExprUtils extends QueryErrorsBase {
TypeCheckSuccess
}
}
+
+ def assertValidAggregation(a: Aggregate): Unit = {
+ def checkValidAggregateExpression(expr: Expression): Unit = expr match {
+ case expr: AggregateExpression =>
+ val aggFunction = expr.aggregateFunction
+ aggFunction.children.foreach {
+ child =>
+ child.foreach {
+ case expr: AggregateExpression =>
+ expr.failAnalysis(
+ errorClass = "NESTED_AGGREGATE_FUNCTION",
+ messageParameters = Map.empty)
+ case other => // OK
+ }
+
+ if (!child.deterministic) {
+ child.failAnalysis(
+ errorClass =
"AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
+ messageParameters = Map("sqlExpr" -> toSQLExpr(expr)))
+ }
+ }
+ case _: Attribute if a.groupingExpressions.isEmpty =>
+ a.failAnalysis(
+ errorClass = "MISSING_GROUP_BY",
+ messageParameters = Map.empty)
+ case e: Attribute if !a.groupingExpressions.exists(_.semanticEquals(e))
=>
+ throw QueryCompilationErrors.columnNotInGroupByClauseError(e)
+ case s: ScalarSubquery
+ if s.children.nonEmpty &&
!a.groupingExpressions.exists(_.semanticEquals(s)) =>
+ s.failAnalysis(
+ errorClass = "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
+ messageParameters = Map("sqlExpr" -> toSQLExpr(s)))
+ case e if a.groupingExpressions.exists(_.semanticEquals(e)) => // OK
+ // There should be no Window in Aggregate - this case will fail later
check anyway.
+ // Perform this check for special case of lateral column alias, when the
window
+ // expression is not eligible to propagate to upper plan because it is
not valid,
+ // containing non-group-by or non-aggregate-expressions.
+ case WindowExpression(function, spec) =>
+ function.children.foreach(checkValidAggregateExpression)
+ checkValidAggregateExpression(spec)
+ case e => e.children.foreach(checkValidAggregateExpression)
+ }
+
+ def checkValidGroupingExprs(expr: Expression): Unit = {
+ if (expr.exists(_.isInstanceOf[AggregateExpression])) {
+ expr.failAnalysis(
+ errorClass = "GROUP_BY_AGGREGATE",
+ messageParameters = Map("sqlExpr" -> expr.sql))
+ }
+
+ // Check if the data type of expr is orderable.
+ if (!RowOrdering.isOrderable(expr.dataType)) {
+ expr.failAnalysis(
+ errorClass = "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE",
+ messageParameters = Map(
+ "sqlExpr" -> toSQLExpr(expr),
+ "dataType" -> toSQLType(expr.dataType)))
+ }
+
+ if (!expr.deterministic) {
+ // This is just a sanity check, our analysis rule
PullOutNondeterministic should
+ // already pull out those nondeterministic expressions and evaluate
them in
+ // a Project node.
+ throw SparkException.internalError(
+ msg = s"Non-deterministic expression '${
+ toSQLExpr(expr)
+ }' should not appear in " +
+ "grouping expression.",
Review Comment:
```suggestion
msg = s"Non-deterministic expression '${toSQLExpr(expr)}' should
not appear in " +
"grouping expression.",
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]