This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d7b97a1 [SPARK-31166][SQL] UNION map<null, null> and other maps
should not fail
d7b97a1 is described below
commit d7b97a1d0daf65710317321490a833f696a46f21
Author: Wenchen Fan <[email protected]>
AuthorDate: Tue Mar 17 12:01:29 2020 +0800
[SPARK-31166][SQL] UNION map<null, null> and other maps should not fail
### What changes were proposed in this pull request?
After https://github.com/apache/spark/pull/27542, `map()` returns
`map<null, null>` instead of `map<string, string>`. However, this breaks
queries which union `map()` and other maps.
The reason is, `TypeCoercion` rules and `Cast` think it's illegal to cast
null type map key to other types, as it makes the key nullable, but it's
actually legal. This PR fixes it.
### Why are the changes needed?
To avoid breaking queries.
### Does this PR introduce any user-facing change?
Yes, now some queries that work in 2.x can work in 3.0 as well.
### How was this patch tested?
new test
Closes #27926 from cloud-fan/bug.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +-
.../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 8 ++++++--
sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 6 ++++++
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 0a0bef6..e149bf2 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -158,7 +158,7 @@ object TypeCoercion {
}
case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2,
valueContainsNull2)) =>
findTypeFunc(kt1, kt2)
- .filter { kt => !Cast.forceNullable(kt1, kt) &&
!Cast.forceNullable(kt2, kt) }
+ .filter { kt => Cast.canCastMapKeyNullSafe(kt1, kt) &&
Cast.canCastMapKeyNullSafe(kt2, kt) }
.flatMap { kt =>
findTypeFunc(vt1, vt2).map { vt =>
MapType(kt, vt, valueContainsNull1 || valueContainsNull2 ||
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 7c4316f..8177136 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -77,8 +77,7 @@ object Cast {
resolvableNullability(fn || forceNullable(fromType, toType), tn)
case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
- canCast(fromKey, toKey) &&
- (!forceNullable(fromKey, toKey)) &&
+ canCast(fromKey, toKey) && canCastMapKeyNullSafe(fromKey, toKey) &&
canCast(fromValue, toValue) &&
resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
@@ -98,6 +97,11 @@ object Cast {
case _ => false
}
+ def canCastMapKeyNullSafe(fromType: DataType, toType: DataType): Boolean = {
+ // If the original map key type is NullType, it's OK as the map must be
empty.
+ fromType == NullType || !forceNullable(fromType, toType)
+ }
+
/**
* Return true if we need to use the `timeZone` information casting `from`
type to `to` type.
* The patterns matched reflect the current implementation in the Cast node.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 823e432..de0f780 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3487,6 +3487,12 @@ class SQLQuerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
}
}
}
+
+ test("SPARK-31166: UNION map<null, null> and other maps should not fail") {
+ checkAnswer(
+ sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"),
+ Seq(Row(Map[Int, Int]()), Row(Map(1 -> 2))))
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]