cloud-fan commented on code in PR #45133:
URL: https://github.com/apache/spark/pull/45133#discussion_r1510765933
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala:
##########
@@ -248,24 +248,76 @@ object RewritePredicateSubquery extends Rule[LogicalPlan]
with PredicateHelper {
case u: UnaryNode if u.expressions.exists(
SubqueryExpression.hasInOrCorrelatedExistsSubquery) =>
var newChild = u.child
- u.mapExpressions(expr => {
- val (newExpr, p) = rewriteExistentialExpr(Seq(expr), newChild)
+ var introducedAttrs = Seq.empty[Attribute]
+ val updatedNode = u.mapExpressions(expr => {
+ val (newExpr, p, newAttrs) =
rewriteExistentialExprWithAttrs(Seq(expr), newChild)
newChild = p
+ introducedAttrs ++= newAttrs
// The newExpr can not be None
newExpr.get
}).withNewChildren(Seq(newChild))
+ updatedNode match {
+ case a: Aggregate =>
+ // If we have introduced new `exists`-attributes that:
+ // 1) are referenced by aggregateExpressions within a
non-aggregateFunction expression
+ // 2) are not referenced by groupingExpressions
+ // we wrap them in first() aggregate function. first() is Spark's
executable version of
+ // any_value() aggregate function.
+ // We do this to keep the aggregation valid, i.e avoid references
outside of aggregate
+ // functions that are not in grouping expressions.
+ // Here, the value of `exists` is functionally determined by
grouping expressions, so
+ // applying any aggregate function is semantically safe.
+ val aggFunctionReferences = a.aggregateExpressions.
+ flatMap(extractAggregateExpressions).
+ flatMap(_.references).toSet
+ val nonAggFuncReferences =
+
a.aggregateExpressions.flatMap(_.references).filterNot(aggFunctionReferences.contains)
+ val groupingReferences = a.groupingExpressions.flatMap(_.references)
Review Comment:
This looks incorrect. e.g. `SELECT a ... GROUP BY a + b` is invalid even
though `a` is a grouping reference.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]