agubichev commented on code in PR #41301:
URL: https://github.com/apache/spark/pull/41301#discussion_r1267038851
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala:
##########
@@ -804,18 +804,67 @@ object DecorrelateInnerQuery extends PredicateHelper {
(d.copy(child = newChild), joinCond, outerReferenceMap)
case j @ Join(left, right, joinType, condition, _) =>
- val outerReferences = collectOuterReferences(j.expressions)
- // Join condition containing outer references is not supported.
- assert(outerReferences.isEmpty, s"Correlated column is not allowed
in join: $j")
- val newOuterReferences = parentOuterReferences ++ outerReferences
- val shouldPushToLeft = joinType match {
+ def splitCorrelatedPredicate(condition: Option[Expression],
+ isInnerJoin: Boolean,
+ shouldDecorrelatePredicates: Boolean):
+ (Seq[Expression], Seq[Expression], Seq[Expression],
+ Seq[Expression], AttributeMap[Attribute]) = {
+ // Similar to Filters above, we split the join condition (if
present) into correlated
+ // and uncorrelated predicates, and separately handle joins
under set and aggregation
+ // operations.
+ if (shouldDecorrelatePredicates) {
+ val conditions =
+ if (condition.isDefined)
splitConjunctivePredicates(condition.get)
+ else Seq.empty[Expression]
+ val (correlated, uncorrelated) =
conditions.partition(containsOuter)
+ val equivalences =
+ if (underSetOp) AttributeMap.empty[Attribute]
+ else collectEquivalentOuterReferences(correlated)
+ var (equalityCond, predicates) =
+ if (underSetOp) (Seq.empty[Expression], correlated)
+ else correlated.partition(canPullUpOverAgg)
Review Comment:
Not sure I follow exactly, we still need to split the 'correlated' into two
sets (equalities and everything else), right?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala:
##########
@@ -804,18 +804,67 @@ object DecorrelateInnerQuery extends PredicateHelper {
(d.copy(child = newChild), joinCond, outerReferenceMap)
case j @ Join(left, right, joinType, condition, _) =>
- val outerReferences = collectOuterReferences(j.expressions)
- // Join condition containing outer references is not supported.
- assert(outerReferences.isEmpty, s"Correlated column is not allowed
in join: $j")
- val newOuterReferences = parentOuterReferences ++ outerReferences
- val shouldPushToLeft = joinType match {
+ def splitCorrelatedPredicate(condition: Option[Expression],
+ isInnerJoin: Boolean,
+ shouldDecorrelatePredicates: Boolean):
+ (Seq[Expression], Seq[Expression], Seq[Expression],
+ Seq[Expression], AttributeMap[Attribute]) = {
+ // Similar to Filters above, we split the join condition (if
present) into correlated
+ // and uncorrelated predicates, and separately handle joins
under set and aggregation
+ // operations.
+ if (shouldDecorrelatePredicates) {
+ val conditions =
+ if (condition.isDefined)
splitConjunctivePredicates(condition.get)
+ else Seq.empty[Expression]
+ val (correlated, uncorrelated) =
conditions.partition(containsOuter)
+ val equivalences =
+ if (underSetOp) AttributeMap.empty[Attribute]
+ else collectEquivalentOuterReferences(correlated)
+ var (equalityCond, predicates) =
+ if (underSetOp) (Seq.empty[Expression], correlated)
+ else correlated.partition(canPullUpOverAgg)
+ // Fully preserve the join predicate for non-inner joins.
+ if (!isInnerJoin) {
+ predicates = predicates ++ equalityCond
Review Comment:
Done
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala:
##########
@@ -804,18 +804,67 @@ object DecorrelateInnerQuery extends PredicateHelper {
(d.copy(child = newChild), joinCond, outerReferenceMap)
case j @ Join(left, right, joinType, condition, _) =>
- val outerReferences = collectOuterReferences(j.expressions)
- // Join condition containing outer references is not supported.
- assert(outerReferences.isEmpty, s"Correlated column is not allowed
in join: $j")
- val newOuterReferences = parentOuterReferences ++ outerReferences
- val shouldPushToLeft = joinType match {
+ def splitCorrelatedPredicate(condition: Option[Expression],
+ isInnerJoin: Boolean,
+ shouldDecorrelatePredicates: Boolean):
+ (Seq[Expression], Seq[Expression], Seq[Expression],
+ Seq[Expression], AttributeMap[Attribute]) = {
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]