Repository: spark Updated Branches: refs/heads/branch-2.2 3cefddee5 -> 3ae187b96
[SPARK-22469][SQL] Accuracy problem in comparison with string and numeric This fixes a problem caused by #15880 `select '1.5' > 0.5; // Result is NULL in Spark but is true in Hive. ` When compare string and numeric, cast them as double like Hive. Author: liutang123 <[email protected]> Closes #19692 from liutang123/SPARK-22469. (cherry picked from commit bc0848b4c1ab84ccef047363a70fd11df240dbbf) Signed-off-by: Wenchen Fan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ae187b9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ae187b9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ae187b9 Branch: refs/heads/branch-2.2 Commit: 3ae187b963b6cec8d92080dd7c2fd098b0ff2882 Parents: 3cefdde Author: liutang123 <[email protected]> Authored: Wed Nov 15 09:02:54 2017 -0800 Committer: Wenchen Fan <[email protected]> Committed: Thu Nov 16 12:47:22 2017 +0100 ---------------------------------------------------------------------- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 7 +++++++ .../spark/sql/catalyst/analysis/TypeCoercionSuite.scala | 3 +++ .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 4 ++++ 3 files changed, 14 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3ae187b9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index e1dd010..4772ab1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -125,6 +125,13 @@ object TypeCoercion { case (DateType, TimestampType) => Some(StringType) case (StringType, NullType) => Some(StringType) case (NullType, StringType) => Some(StringType) + + // There is no proper decimal type we can pick, + // using double type is the best we can do. + // See SPARK-22469 for details. + case (n: DecimalType, s: StringType) => Some(DoubleType) + case (s: StringType, n: DecimalType) => Some(DoubleType) + case (l: StringType, r: AtomicType) if r != StringType => Some(r) case (l: AtomicType, r: StringType) if (l != StringType) => Some(l) case (l, r) => None http://git-wip-us.apache.org/repos/asf/spark/blob/3ae187b9/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 2624f558..06514ad 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -997,6 +997,9 @@ class TypeCoercionSuite extends PlanTest { ruleTest(PromoteStrings, EqualTo(Literal(Array(1, 2)), Literal("123")), EqualTo(Literal(Array(1, 2)), Literal("123"))) + ruleTest(PromoteStrings, + GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))), + GreaterThan(Cast(Literal("1.5"), DoubleType), Cast(Literal(BigDecimal("0.5")), DoubleType))) } } http://git-wip-us.apache.org/repos/asf/spark/blob/3ae187b9/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 0a9aafb..7450a1a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1785,4 +1785,8 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val mean = result.select("DecimalCol").where($"summary" === "mean") assert(mean.collect().toSet === Set(Row("0.0345678900000000000000000000000000000"))) } + + test("SPARK-22469: compare string with decimal") { + checkAnswer(Seq("1.5").toDF("s").filter("s > 0.5"), Row("1.5")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
