Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/22326#discussion_r215890040
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
---
@@ -1202,15 +1243,38 @@ object PushPredicateThroughJoin extends
Rule[LogicalPlan] with PredicateHelper {
split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
joinType match {
- case _: InnerLike | LeftSemi =>
- // push down the single side only join filter for both sides sub
queries
- val newLeft = leftJoinConditions.
- reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
- val newRight = rightJoinConditions.
- reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
- val newJoinCond = commonJoinCondition.reduceLeftOption(And)
+ case LeftSemi =>
+ val (newLeft, newRight, newJoinCond, others) =
getNewChildAndSplitCondForJoin(
+ j, leftJoinConditions, rightJoinConditions,
commonJoinCondition)
+ // need to add cross join when unevaluable condition exists
+ val newJoinType = if (others.nonEmpty) {
+ tryToGetCrossType(commonJoinCondition, j)
+ } else {
+ joinType
+ }
- Join(newLeft, newRight, joinType, newJoinCond)
+ val join = Join(newLeft, newRight, newJoinType, newJoinCond)
+ if (others.nonEmpty) {
+ Project(newLeft.output.map(_.toAttribute),
Filter(others.reduceLeft(And), join))
+ } else {
+ join
+ }
+ case _: InnerLike =>
+ val (newLeft, newRight, newJoinCond, others) =
getNewChildAndSplitCondForJoin(
+ j, leftJoinConditions, rightJoinConditions,
commonJoinCondition)
+ // only need to add cross join when whole commonJoinCondition
are unevaluable
+ val newJoinType = if (commonJoinCondition.nonEmpty &&
newJoinCond.isEmpty) {
--- End diff --
mmh...why here we have this check and later for the filter we check
`others.nonEmpty`? Shouldn't be the same?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]