agubichev commented on code in PR #41301:
URL: https://github.com/apache/spark/pull/41301#discussion_r1267038310
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala:
##########
@@ -804,18 +804,67 @@ object DecorrelateInnerQuery extends PredicateHelper {
(d.copy(child = newChild), joinCond, outerReferenceMap)
case j @ Join(left, right, joinType, condition, _) =>
- val outerReferences = collectOuterReferences(j.expressions)
- // Join condition containing outer references is not supported.
- assert(outerReferences.isEmpty, s"Correlated column is not allowed
in join: $j")
- val newOuterReferences = parentOuterReferences ++ outerReferences
- val shouldPushToLeft = joinType match {
+ def splitCorrelatedPredicate(condition: Option[Expression],
+ isInnerJoin: Boolean,
+ shouldDecorrelatePredicates: Boolean):
+ (Seq[Expression], Seq[Expression], Seq[Expression],
+ Seq[Expression], AttributeMap[Attribute]) = {
+ // Similar to Filters above, we split the join condition (if
present) into correlated
+ // and uncorrelated predicates, and separately handle joins
under set and aggregation
+ // operations.
+ if (shouldDecorrelatePredicates) {
+ val conditions =
+ if (condition.isDefined)
splitConjunctivePredicates(condition.get)
+ else Seq.empty[Expression]
+ val (correlated, uncorrelated) =
conditions.partition(containsOuter)
+ val equivalences =
+ if (underSetOp) AttributeMap.empty[Attribute]
+ else collectEquivalentOuterReferences(correlated)
+ var (equalityCond, predicates) =
+ if (underSetOp) (Seq.empty[Expression], correlated)
+ else correlated.partition(canPullUpOverAgg)
+ // Fully preserve the join predicate for non-inner joins.
+ if (!isInnerJoin) {
+ predicates = predicates ++ equalityCond
+ }
+ (correlated, uncorrelated, equalityCond, predicates,
equivalences)
+ } else {
+ (Seq.empty[Expression],
+ if (condition.isEmpty) Seq.empty[Expression] else
Seq(condition.get),
+ Seq.empty[Expression],
+ Seq.empty[Expression],
+ AttributeMap.empty[Attribute])
+ }
+ }
+
+ val shouldDecorrelatePredicates =
+ SQLConf.get.getConf(SQLConf.DECORRELATE_JOIN_PREDICATE_ENABLED)
+ if (!shouldDecorrelatePredicates) {
+ val outerReferences = collectOuterReferences(j.expressions)
+ // Join condition containing outer references is not supported.
+ assert(outerReferences.isEmpty, s"Correlated column is not
allowed in join: $j")
+ }
+ val (correlated, uncorrelated, equalityCond, predicates,
equivalences) =
+ splitCorrelatedPredicate(condition, joinType == Inner,
shouldDecorrelatePredicates)
+ val outerReferences = collectOuterReferences(j.expressions) ++
+ collectOuterReferences(predicates)
+ val newOuterReferences =
+ parentOuterReferences ++ outerReferences -- equivalences.keySet
+ var shouldPushToLeft = joinType match {
case LeftOuter | LeftSemiOrAnti(_) | FullOuter => true
case _ => hasOuterReferences(left)
}
val shouldPushToRight = joinType match {
case RightOuter | FullOuter => true
case _ => hasOuterReferences(right)
}
+ if (shouldDecorrelatePredicates && !shouldPushToLeft &&
!shouldPushToRight
+ && !correlated.isEmpty) {
Review Comment:
good catch!
note that there would not be any domain join even with 'correlated.isEmpty',
as newOuterReferences would have those references involved in equivalences
removed (few lines above this one).
However, with this change it is a bit more explicit.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]