Github user marmbrus commented on a diff in the pull request:
https://github.com/apache/spark/pull/482#discussion_r12263088
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
---
@@ -87,6 +88,75 @@ object ColumnPruning extends Rule[LogicalPlan] {
/**
* Replaces [[catalyst.expressions.Expression Expressions]] that can be
statically evaluated with
+ * equivalent [[catalyst.expressions.Literal Literal]] values. This rule
is more specific with
+ * Null value propagation from bottom to top of the expression tree.
+ */
+object NullPropagation extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+ case q: LogicalPlan => q transformExpressionsUp {
+ // Skip redundant folding of literals.
+ case l: Literal => l
+ case e @ Count(Literal(null, _)) => Literal(0, e.dataType)
+ case e @ Sum(Literal(c, _)) if(c == 0) => Literal(0, e.dataType)
+ case e @ Average(Literal(c, _)) if(c == 0) => Literal(0.0,
e.dataType)
+ case e @ IsNull(c) if c.nullable == false => Literal(false,
BooleanType)
+ case e @ IsNotNull(c) if c.nullable == false => Literal(true,
BooleanType)
+ case e @ GetItem(Literal(null, _), _) => Literal(null, e.dataType)
+ case e @ GetItem(_, Literal(null, _)) => Literal(null, e.dataType)
+ case e @ GetField(Literal(null, _), _) => Literal(null, e.dataType)
+ case e @ Coalesce(children) => {
+ val newChildren = children.filter(c => c match {
+ case Literal(null, _) => false
+ case _ => true
+ })
+ if(newChildren.length == 0) {
+ Literal(null, e.dataType)
+ } else if(newChildren.length == 1) {
+ newChildren(0)
+ } else {
+ Coalesce(newChildren)
+ }
+ }
+ case e @ If(Literal(v, _), trueValue, falseValue) => if(v == true)
trueValue else falseValue
+ case e @ In(Literal(v, _), list) if(list.exists(c => c match {
+ case Literal(candidate, _) if(candidate == v) => true
+ case _ => false
+ })) => Literal(true, BooleanType)
+ case e: UnaryMinus => e.child match {
+ case Literal(null, _) => Literal(null, e.dataType)
+ case _ => e
+ }
+ case e: Cast => e.child match {
+ case Literal(null, _) => Literal(null, e.dataType)
+ case _ => e
+ }
+ case e: Not => e.child match {
+ case Literal(null, _) => Literal(null, e.dataType)
+ case _ => e
+ }
+ case e: And => e // leave it for BooleanSimplification
+ case e: Or => e // leave it for BooleanSimplification
+ // Put exceptional cases above
+ case e: BinaryArithmetic => e.children match {
+ case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
+ case left :: Literal(null, _) :: Nil => Literal(null, e.dataType)
+ case _ => e
+ }
+ case e: BinaryPredicate => e.children match {
--- End diff --
If you match on `BinaryComparison` instead of `BinaryPredicate` you won't
need to skip `And` and `Or` above, which seems a little clearer.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---