Repository: spark
Updated Branches:
  refs/heads/master d45e0c6b9 -> 9a2448daf


[SPARK-2052] [SQL] Add optimization for CaseConversionExpression's.

Add optimization for `CaseConversionExpression`'s.

Author: Takuya UESHIN <ues...@happy-camper.st>

Closes #990 from ueshin/issues/SPARK-2052 and squashes the following commits:

2568666 [Takuya UESHIN] Move some rules back.
dde7ede [Takuya UESHIN] Add tests to check if ConstantFolding can handle null 
literals and remove the unneeded rules from NullPropagation.
c4eea67 [Takuya UESHIN] Fix toString methods.
23e2363 [Takuya UESHIN] Make CaseConversionExpressions foldable if the child is 
foldable.
0ff7568 [Takuya UESHIN] Add tests for collapsing case statements.
3977d80 [Takuya UESHIN] Add optimization for CaseConversionExpression's.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9a2448da
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9a2448da
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9a2448da

Branch: refs/heads/master
Commit: 9a2448daf984d5bb550dfe0d9e28cbb80ef5cb51
Parents: d45e0c6
Author: Takuya UESHIN <ues...@happy-camper.st>
Authored: Wed Jun 11 17:58:35 2014 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Wed Jun 11 17:58:35 2014 -0700

----------------------------------------------------------------------
 .../catalyst/expressions/stringOperations.scala |  7 +-
 .../sql/catalyst/optimizer/Optimizer.scala      | 30 ++++---
 .../optimizer/ConstantFoldingSuite.scala        | 61 ++++++++++++-
 ...SimplifyCaseConversionExpressionsSuite.scala | 91 ++++++++++++++++++++
 4 files changed, 174 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9a2448da/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 4203034..c074b7b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -76,7 +76,8 @@ trait CaseConversionExpression {
   type EvaluatedType = Any
 
   def convert(v: String): String
-  
+
+  override def foldable: Boolean = child.foldable
   def nullable: Boolean = child.nullable
   def dataType: DataType = StringType
 
@@ -142,6 +143,8 @@ case class RLike(left: Expression, right: Expression)
 case class Upper(child: Expression) extends UnaryExpression with 
CaseConversionExpression {
   
   override def convert(v: String): String = v.toUpperCase()
+
+  override def toString() = s"Upper($child)"
 }
 
 /**
@@ -150,4 +153,6 @@ case class Upper(child: Expression) extends UnaryExpression 
with CaseConversionE
 case class Lower(child: Expression) extends UnaryExpression with 
CaseConversionExpression {
   
   override def convert(v: String): String = v.toLowerCase()
+
+  override def toString() = s"Lower($child)"
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/9a2448da/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 28d1aa2..25a347b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -36,7 +36,8 @@ object Optimizer extends RuleExecutor[LogicalPlan] {
       ConstantFolding,
       BooleanSimplification,
       SimplifyFilters,
-      SimplifyCasts) ::
+      SimplifyCasts,
+      SimplifyCaseConversionExpressions) ::
     Batch("Filter Pushdown", FixedPoint(100),
       CombineFilters,
       PushPredicateThroughProject,
@@ -132,18 +133,6 @@ object NullPropagation extends Rule[LogicalPlan] {
           case Literal(candidate, _) if candidate == v => true
           case _ => false
         })) => Literal(true, BooleanType)
-      case e: UnaryMinus => e.child match {
-        case Literal(null, _) => Literal(null, e.dataType)
-        case _ => e
-      }
-      case e: Cast => e.child match {
-        case Literal(null, _) => Literal(null, e.dataType)
-        case _ => e
-      }
-      case e: Not => e.child match {
-        case Literal(null, _) => Literal(null, e.dataType)
-        case _ => e
-      }
       // Put exceptional cases above if any
       case e: BinaryArithmetic => e.children match {
         case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
@@ -375,3 +364,18 @@ object CombineLimits extends Rule[LogicalPlan] {
       Limit(If(LessThan(ne, le), ne, le), grandChild)
   }
 }
+
+/**
+ * Removes the inner [[catalyst.expressions.CaseConversionExpression]] that 
are unnecessary because
+ * the inner conversion is overwritten by the outer one.
+ */
+object SimplifyCaseConversionExpressions extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case q: LogicalPlan => q transformExpressionsUp {
+      case Upper(Upper(child)) => Upper(child)
+      case Upper(Lower(child)) => Upper(child)
+      case Lower(Upper(child)) => Lower(child)
+      case Lower(Lower(child)) => Lower(child)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/9a2448da/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 20dfba8..6efc0e2 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -21,7 +21,7 @@ import 
org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.types.{DoubleType, IntegerType}
+import org.apache.spark.sql.catalyst.types._
 
 // For implicit conversions
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -173,4 +173,63 @@ class ConstantFoldingSuite extends OptimizerTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("Constant folding test: expressions have null literals") {
+    val originalQuery =
+      testRelation
+        .select(
+          IsNull(Literal(null)) as 'c1,
+          IsNotNull(Literal(null)) as 'c2,
+
+          GetItem(Literal(null, ArrayType(IntegerType)), 1) as 'c3,
+          GetItem(Literal(Seq(1), ArrayType(IntegerType)), Literal(null, 
IntegerType)) as 'c4,
+          GetField(
+            Literal(null, StructType(Seq(StructField("a", IntegerType, 
true)))),
+            "a") as 'c5,
+
+          UnaryMinus(Literal(null, IntegerType)) as 'c6,
+          Cast(Literal(null), IntegerType) as 'c7,
+          Not(Literal(null, BooleanType)) as 'c8,
+
+          Add(Literal(null, IntegerType), 1) as 'c9,
+          Add(1, Literal(null, IntegerType)) as 'c10,
+
+          Equals(Literal(null, IntegerType), 1) as 'c11,
+          Equals(1, Literal(null, IntegerType)) as 'c12,
+
+          Like(Literal(null, StringType), "abc") as 'c13,
+          Like("abc", Literal(null, StringType)) as 'c14,
+
+          Upper(Literal(null, StringType)) as 'c15)
+
+    val optimized = Optimize(originalQuery.analyze)
+
+    val correctAnswer =
+      testRelation
+        .select(
+          Literal(true) as 'c1,
+          Literal(false) as 'c2,
+
+          Literal(null, IntegerType) as 'c3,
+          Literal(null, IntegerType) as 'c4,
+          Literal(null, IntegerType) as 'c5,
+
+          Literal(null, IntegerType) as 'c6,
+          Literal(null, IntegerType) as 'c7,
+          Literal(null, BooleanType) as 'c8,
+
+          Literal(null, IntegerType) as 'c9,
+          Literal(null, IntegerType) as 'c10,
+
+          Literal(null, BooleanType) as 'c11,
+          Literal(null, BooleanType) as 'c12,
+
+          Literal(null, BooleanType) as 'c13,
+          Literal(null, BooleanType) as 'c14,
+
+          Literal(null, StringType) as 'c15)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/9a2448da/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala
new file mode 100644
index 0000000..df1409f
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+
+/* Implicit conversions */
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+
+class SimplifyCaseConversionExpressionsSuite extends OptimizerTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Simplify CaseConversionExpressions", Once,
+        SimplifyCaseConversionExpressions) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.string)
+
+  test("simplify UPPER(UPPER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Upper(Upper('a)) as 'u)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select(Upper('a) as 'u)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify UPPER(LOWER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Upper(Lower('a)) as 'u)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select(Upper('a) as 'u)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify LOWER(UPPER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Lower(Upper('a)) as 'l)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .select(Lower('a) as 'l)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify LOWER(LOWER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Lower(Lower('a)) as 'l)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .select(Lower('a) as 'l)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}

Reply via email to