Github user davies commented on a diff in the pull request:
https://github.com/apache/spark/pull/10567#discussion_r53433203
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
---
@@ -932,6 +933,66 @@ object ReorderJoin extends Rule[LogicalPlan] with
PredicateHelper {
}
/**
+ * Elimination of outer joins, if the predicates can restrict the result
sets so that
+ * all null-supplying rows are eliminated
+ *
+ * - full outer -> inner if both sides have such predicates
+ * - left outer -> inner if the right side has such predicates
+ * - right outer -> inner if the left side has such predicates
+ * - full outer -> left outer if only the left side has such predicates
+ * - full outer -> right outer if only the right side has such predicates
+ *
+ * This rule should be executed before pushing down the Filter
+ */
+object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper
{
+
+ /**
+ * Returns whether the expression returns null or false when all inputs
are nulls.
+ */
+ private def canFilterOutNull(e: Expression): Boolean = {
+ val attributes = e.references.toSeq
+ val emptyRow = new GenericInternalRow(attributes.length)
+ val v = BindReferences.bindReference(e, attributes).eval(emptyRow)
+ v == null || v == false
+ }
+
+ private def buildNewJoin(filter: Filter, join: Join): Join = {
+ val splitConjunctiveConditions: Seq[Expression] =
splitConjunctivePredicates(filter.condition)
+ val leftConditions = splitConjunctiveConditions
+ .filter(_.references.subsetOf(join.left.outputSet))
+ val rightConditions = splitConjunctiveConditions
+ .filter(_.references.subsetOf(join.right.outputSet))
+
+ val leftHasNonNullPredicate = leftConditions.exists(canFilterOutNull)
||
+ filter.constraints.filter(_.isInstanceOf[IsNotNull])
+ .exists(expr =>
join.left.outputSet.intersect(expr.references).nonEmpty)
--- End diff --
This is not correct, IsNotNull(Add(A, B)) does not mean both A and B should
be not null.
I think we don't need these constraints.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]