Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/11665#discussion_r55917538 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala --- @@ -598,50 +598,44 @@ object NullPropagation extends Rule[LogicalPlan] { } /** - * Attempts to eliminate reading (unnecessary) NULL values if they are not required for correctness - * by inserting isNotNull filters in the query plan. These filters are currently inserted beneath - * existing Filters and Join operators and are inferred based on their data constraints. + * Eliminate reading unnecessary values if they are not required for correctness (and can help in + * optimizing the query) by inserting relevant filters in the query plan based on an operator's + * data constraints. These filters are currently inserted to the existing conditions in the Filter + * operators and on either side of Join operators. * * Note: While this optimization is applicable to all types of join, it primarily benefits Inner and * LeftSemi joins. */ -object NullFiltering extends Rule[LogicalPlan] with PredicateHelper { +object InferFiltersFromConstraints extends Rule[LogicalPlan] with PredicateHelper { + // We generate a list of additional filters from the operator's existing constraint but remove + // those that are either already part of the operator's condition or are part of the operator's + // child constraints. def apply(plan: LogicalPlan): LogicalPlan = plan transform { case filter @ Filter(condition, child) => - // We generate a list of additional isNotNull filters from the operator's existing constraints - // but remove those that are either already part of the filter condition or are part of the - // operator's child constraints. - val newIsNotNullConstraints = filter.constraints.filter(_.isInstanceOf[IsNotNull]) -- + // For the Filter operator, we try to generate additional filters by only inferring the + // IsNotNull constraints. These IsNotNull filters are then used while generating the + // physical plan to quickly short circuit the null checks in the generated code. + val newFilters = filter.constraints.filter(_.isInstanceOf[IsNotNull]) -- (child.constraints ++ splitConjunctivePredicates(condition)) - if (newIsNotNullConstraints.nonEmpty) { - Filter(And(newIsNotNullConstraints.reduce(And), condition), child) + if (newFilters.nonEmpty) { + Filter(And(newFilters.reduce(And), condition), child) } else { filter } - case join @ Join(left, right, joinType, condition) => - val leftIsNotNullConstraints = join.constraints - .filter(_.isInstanceOf[IsNotNull]) - .filter(_.references.subsetOf(left.outputSet)) -- left.constraints - val rightIsNotNullConstraints = - join.constraints - .filter(_.isInstanceOf[IsNotNull]) - .filter(_.references.subsetOf(right.outputSet)) -- right.constraints - val newLeftChild = if (leftIsNotNullConstraints.nonEmpty) { - Filter(leftIsNotNullConstraints.reduce(And), left) - } else { - left - } - val newRightChild = if (rightIsNotNullConstraints.nonEmpty) { - Filter(rightIsNotNullConstraints.reduce(And), right) - } else { - right - } - if (newLeftChild != left || newRightChild != right) { - Join(newLeftChild, newRightChild, joinType, condition) - } else { - join + case join@Join(left, right, joinType, conditionOpt) => --- End diff -- Nit: `join @ Join`
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org