spark git commit: [SPARK-12841][SQL][BRANCH-1.6] fix cast in filter

yhuai Mon, 18 Jan 2016 21:20:42 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 d43704d7f -> 68265ac23



[SPARK-12841][SQL][BRANCH-1.6] fix cast in filter

In SPARK-10743 we wrap cast with `UnresolvedAlias` to give `Cast` a better 
alias if possible. However, for cases like filter, the `UnresolvedAlias` can't 
be resolved and actually we don't need a better alias for this case. This PR 
move the cast wrapping logic to `Column.named` so that we will only do it when 
we need a alias name.

backport https://github.com/apache/spark/pull/10781 to 1.6

Author: Wenchen Fan <[email protected]>

Closes #10819 from cloud-fan/bug.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68265ac2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68265ac2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68265ac2

Branch: refs/heads/branch-1.6
Commit: 68265ac23e20305474daef14bbcf874308ca8f5a
Parents: d43704d
Author: Wenchen Fan <[email protected]>
Authored: Mon Jan 18 21:20:19 2016 -0800
Committer: Yin Huai <[email protected]>
Committed: Mon Jan 18 21:20:19 2016 -0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  2 +-
 .../main/scala/org/apache/spark/sql/Column.scala   | 17 ++++++++++-------
 .../org/apache/spark/sql/DataFrameSuite.scala      |  7 +++++++
 3 files changed, 18 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/68265ac2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ca00a5e..6a805f5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -148,7 +148,7 @@ class Analyzer(
     private def assignAliases(exprs: Seq[NamedExpression]) = {
       exprs.zipWithIndex.map {
         case (expr, i) =>
-          expr transform {
+          expr transformUp {
             case u @ UnresolvedAlias(child) => child match {
               case ne: NamedExpression => ne
               case e if !e.resolved => u

http://git-wip-us.apache.org/repos/asf/spark/blob/68265ac2/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 297ef22..10503f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -132,6 +132,15 @@ class Column(protected[sql] val expr: Expression) extends 
Logging {
     case explode: Explode => MultiAlias(explode, Nil)
     case jt: JsonTuple => MultiAlias(jt, Nil)
 
+    // If we have a top level Cast, there is a chance to give it a better 
alias, if there is a
+    // NamedExpression under this Cast.
+    case c: Cast => c.transformUp {
+      case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to))
+    } match {
+      case ne: NamedExpression => ne
+      case other => Alias(expr, expr.prettyString)()
+    }
+
     case expr: Expression => Alias(expr, expr.prettyString)()
   }
 
@@ -931,13 +940,7 @@ class Column(protected[sql] val expr: Expression) extends 
Logging {
    * @group expr_ops
    * @since 1.3.0
    */
-  def cast(to: DataType): Column = withExpr {
-    expr match {
-      // keeps the name of expression if possible when do cast.
-      case ne: NamedExpression => UnresolvedAlias(Cast(expr, to))
-      case _ => Cast(expr, to)
-    }
-  }
+  def cast(to: DataType): Column = withExpr { Cast(expr, to) }
 
   /**
    * Casts the column to a different data type, using the canonical string 
representation

http://git-wip-us.apache.org/repos/asf/spark/blob/68265ac2/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 0b7573c..f2ac560 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -991,6 +991,7 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
   test("SPARK-10743: keep the name of expression if possible when do cast") {
     val df = (1 to 10).map(Tuple1.apply).toDF("i").as("src")
     assert(df.select($"src.i".cast(StringType)).columns.head === "i")
+    assert(df.select($"src.i".cast(StringType).cast(IntegerType)).columns.head 
=== "i")
   }
 
   test("SPARK-11301: fix case sensitivity for filter on partitioned columns") {
@@ -1163,4 +1164,10 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
     val primitiveUDF = udf((i: Int) => i * 2)
     checkAnswer(df.select(primitiveUDF($"age")), Row(44) :: Row(null) :: Nil)
   }
+
+  test("SPARK-12841: cast in filter") {
+    checkAnswer(
+      Seq(1 -> "a").toDF("i", "j").filter($"i".cast(StringType) === "1"),
+      Row(1, "a"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-12841][SQL][BRANCH-1.6] fix cast in filter

Reply via email to