Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/16954#discussion_r103337724
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
---
@@ -2512,3 +2522,67 @@ object ResolveCreateNamedStruct extends
Rule[LogicalPlan] {
CreateNamedStruct(children.toList)
}
}
+
+/**
+ * The aggregate expressions from subquery referencing outer query block
are pushed
+ * down to the outer query block for evaluation. This rule below updates
such outer references
+ * as AttributeReference referring attributes from the parent/outer query
block.
+ *
+ * For example (SQL):
+ * {{{
+ * SELECT l.a FROM l GROUP BY 1 HAVING EXISTS (SELECT 1 FROM r WHERE r.d
< min(l.b))
+ * }}}
+ * Plan before the rule.
+ * Project [a#226]
+ * +- Filter exists#245 [min(b#227)#249]
+ * : +- Project [1 AS 1#247]
+ * : +- Filter (d#238 < min(outer(b#227))) <-----
+ * : +- SubqueryAlias r
+ * : +- Project [_1#234 AS c#237, _2#235 AS d#238]
+ * : +- LocalRelation [_1#234, _2#235]
+ * +- Aggregate [a#226], [a#226, min(b#227) AS min(b#227)#249]
+ * +- SubqueryAlias l
+ * +- Project [_1#223 AS a#226, _2#224 AS b#227]
+ * +- LocalRelation [_1#223, _2#224]
+ * Plan after the rule.
+ * Project [a#226]
+ * +- Filter exists#245 [min(b#227)#249]
+ * : +- Project [1 AS 1#247]
+ * : +- Filter (d#238 < outer(min(b#227)#249)) <-----
+ * : +- SubqueryAlias r
+ * : +- Project [_1#234 AS c#237, _2#235 AS d#238]
+ * : +- LocalRelation [_1#234, _2#235]
+ * +- Aggregate [a#226], [a#226, min(b#227) AS min(b#227)#249]
+ * +- SubqueryAlias l
+ * +- Project [_1#223 AS a#226, _2#224 AS b#227]
+ * +- LocalRelation [_1#223, _2#224]
+ */
+object UpdateOuterReferences extends Rule[LogicalPlan] {
+ private def stripAlias(expr: Expression): Expression = expr match { case
a: Alias => a.child }
+
+ private def updateOuterReferenceInSubquery(
+ plan: LogicalPlan,
+ refExprs: Seq[Expression]): LogicalPlan = {
+ plan transformAllExpressions { case e =>
+ val outerAlias =
+
refExprs.find(stripAlias(_).semanticEquals(SubExprUtils.stripOuterReference(e)))
+ outerAlias match {
+ case Some(a: Alias) => OuterReference(a.toAttribute)
+ case _ => e
+ }
+ }
+ }
+
+ def apply(plan: LogicalPlan): LogicalPlan = {
+ plan transform {
+ case f @ Filter(_, a: Aggregate) if f.resolved =>
--- End diff --
This only works with aggregates that are already in the `Aggregate`
operator, this seems like a regression. What does Hive do?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]