Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/23139#discussion_r236150260
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
---
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists,
ArrayFilter, CaseWhen, Expression, If}
+import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal,
MapFilter, Or}
+import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join,
LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types.BooleanType
+
+
+/**
+ * A rule that replaces `Literal(null, BooleanType)` with `FalseLiteral`,
if possible, in the search
+ * condition of the WHERE/HAVING/ON(JOIN) clauses, which contain an
implicit Boolean operator
+ * "(search condition) = TRUE". The replacement is only valid when
`Literal(null, BooleanType)` is
+ * semantically equivalent to `FalseLiteral` when evaluating the whole
search condition.
+ *
+ * Please note that FALSE and NULL are not exchangeable in most cases,
when the search condition
+ * contains NOT and NULL-tolerant expressions. Thus, the rule is very
conservative and applicable
+ * in very limited cases.
+ *
+ * For example, `Filter(Literal(null, BooleanType))` is equal to
`Filter(FalseLiteral)`.
+ *
+ * Another example containing branches is `Filter(If(cond, FalseLiteral,
Literal(null, _)))`;
+ * this can be optimized to `Filter(If(cond, FalseLiteral,
FalseLiteral))`, and eventually
+ * `Filter(FalseLiteral)`.
+ *
+ * Moreover, this rule also transforms predicates in all [[If]]
expressions as well as branch
+ * conditions in all [[CaseWhen]] expressions, even if they are not part
of the search conditions.
+ *
+ * For example, `Project(If(And(cond, Literal(null)), Literal(1),
Literal(2)))` can be simplified
+ * into `Project(Literal(2))`.
+ */
+object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
+
+ def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+ case f @ Filter(cond, _) => f.copy(condition =
replaceNullWithFalse(cond))
+ case j @ Join(_, _, _, Some(cond)) => j.copy(condition =
Some(replaceNullWithFalse(cond)))
+ case p: LogicalPlan => p transformExpressions {
+ case i @ If(pred, _, _) => i.copy(predicate =
replaceNullWithFalse(pred))
+ case cw @ CaseWhen(branches, _) =>
+ val newBranches = branches.map { case (cond, value) =>
+ replaceNullWithFalse(cond) -> value
+ }
+ cw.copy(branches = newBranches)
+ case af @ ArrayFilter(_, lf @ LambdaFunction(func, _, _)) =>
+ val newLambda = lf.copy(function = replaceNullWithFalse(func))
+ af.copy(function = newLambda)
+ case ae @ ArrayExists(_, lf @ LambdaFunction(func, _, _)) =>
+ val newLambda = lf.copy(function = replaceNullWithFalse(func))
+ ae.copy(function = newLambda)
+ case mf @ MapFilter(_, lf @ LambdaFunction(func, _, _)) =>
+ val newLambda = lf.copy(function = replaceNullWithFalse(func))
+ mf.copy(function = newLambda)
+ }
+ }
+
+ /**
+ * Recursively traverse the Boolean-type expression to replace
+ * `Literal(null, BooleanType)` with `FalseLiteral`, if possible.
+ *
+ * Note that `transformExpressionsDown` can not be used here as we must
stop as soon as we hit
+ * an expression that is not [[CaseWhen]], [[If]], [[And]], [[Or]] or
+ * `Literal(null, BooleanType)`.
+ */
+ private def replaceNullWithFalse(e: Expression): Expression = {
+ if (e.dataType != BooleanType) {
--- End diff --
We don't handle `LambdaFunction` inside this method, it's caller side.
This method is to deal with boolean expressions, and return the original
expression if it's not:
https://github.com/apache/spark/pull/23139/files#diff-0bb4fc0a3c867b855f84dd1db8867139R103
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]