Repository: spark
Updated Branches:
  refs/heads/branch-2.2 3cefddee5 -> 3ae187b96


[SPARK-22469][SQL] Accuracy problem in comparison with string and numeric

This fixes a problem caused by #15880
`select '1.5' > 0.5; // Result is NULL in Spark but is true in Hive.
`
When compare string and numeric, cast them as double like Hive.

Author: liutang123 <[email protected]>

Closes #19692 from liutang123/SPARK-22469.

(cherry picked from commit bc0848b4c1ab84ccef047363a70fd11df240dbbf)
Signed-off-by: Wenchen Fan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ae187b9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ae187b9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ae187b9

Branch: refs/heads/branch-2.2
Commit: 3ae187b963b6cec8d92080dd7c2fd098b0ff2882
Parents: 3cefdde
Author: liutang123 <[email protected]>
Authored: Wed Nov 15 09:02:54 2017 -0800
Committer: Wenchen Fan <[email protected]>
Committed: Thu Nov 16 12:47:22 2017 +0100

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 7 +++++++
 .../spark/sql/catalyst/analysis/TypeCoercionSuite.scala       | 3 +++
 .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala  | 4 ++++
 3 files changed, 14 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3ae187b9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index e1dd010..4772ab1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -125,6 +125,13 @@ object TypeCoercion {
     case (DateType, TimestampType) => Some(StringType)
     case (StringType, NullType) => Some(StringType)
     case (NullType, StringType) => Some(StringType)
+
+    // There is no proper decimal type we can pick,
+    // using double type is the best we can do.
+    // See SPARK-22469 for details.
+    case (n: DecimalType, s: StringType) => Some(DoubleType)
+    case (s: StringType, n: DecimalType) => Some(DoubleType)
+
     case (l: StringType, r: AtomicType) if r != StringType => Some(r)
     case (l: AtomicType, r: StringType) if (l != StringType) => Some(l)
     case (l, r) => None

http://git-wip-us.apache.org/repos/asf/spark/blob/3ae187b9/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 2624f558..06514ad 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -997,6 +997,9 @@ class TypeCoercionSuite extends PlanTest {
     ruleTest(PromoteStrings,
       EqualTo(Literal(Array(1, 2)), Literal("123")),
       EqualTo(Literal(Array(1, 2)), Literal("123")))
+    ruleTest(PromoteStrings,
+      GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))),
+      GreaterThan(Cast(Literal("1.5"), DoubleType), 
Cast(Literal(BigDecimal("0.5")), DoubleType)))
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3ae187b9/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 0a9aafb..7450a1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1785,4 +1785,8 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
     val mean = result.select("DecimalCol").where($"summary" === "mean")
     assert(mean.collect().toSet === 
Set(Row("0.0345678900000000000000000000000000000")))
   }
+
+  test("SPARK-22469: compare string with decimal") {
+    checkAnswer(Seq("1.5").toDF("s").filter("s > 0.5"), Row("1.5"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to