Repository: spark
Updated Branches:
  refs/heads/branch-2.2 987682160 -> 182478e03


[SPARK-21954][SQL] JacksonUtils should verify MapType's value type instead of 
key type

## What changes were proposed in this pull request?

`JacksonUtils.verifySchema` verifies if a data type can be converted to JSON. 
For `MapType`, it now verifies the key type. However, in `JacksonGenerator`, 
when converting a map to JSON, we only care about its values and create a 
writer for the values. The keys in a map are treated as strings by calling 
`toString` on the keys.

Thus, we should change `JacksonUtils.verifySchema` to verify the value type of 
`MapType`.

## How was this patch tested?

Added tests.

Author: Liang-Chi Hsieh <vii...@gmail.com>

Closes #19167 from viirya/test-jacksonutils.

(cherry picked from commit 6b45d7e941eba8a36be26116787322d9e3ae25d0)
Signed-off-by: hyukjinkwon <gurwls...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/182478e0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/182478e0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/182478e0

Branch: refs/heads/branch-2.2
Commit: 182478e030688b602bf95edfd82f700d6f5678d1
Parents: 9876821
Author: Liang-Chi Hsieh <vii...@gmail.com>
Authored: Sat Sep 9 19:10:52 2017 +0900
Committer: hyukjinkwon <gurwls...@gmail.com>
Committed: Sat Sep 9 19:11:28 2017 +0900

----------------------------------------------------------------------
 .../spark/sql/catalyst/json/JacksonUtils.scala  |  4 +++-
 .../expressions/JsonExpressionsSuite.scala      | 23 +++++++++++++++++++
 .../apache/spark/sql/JsonFunctionsSuite.scala   | 24 +++++++++++++++++---
 3 files changed, 47 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/182478e0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
index 3b23c6c..134d16e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
@@ -44,7 +44,9 @@ object JacksonUtils {
 
       case at: ArrayType => verifyType(name, at.elementType)
 
-      case mt: MapType => verifyType(name, mt.keyType)
+      // For MapType, its keys are treated as a string (i.e. calling 
`toString`) basically when
+      // generating JSON, so we only care if the values are valid for JSON.
+      case mt: MapType => verifyType(name, mt.valueType)
 
       case udt: UserDefinedType[_] => verifyType(name, udt.sqlType)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/182478e0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index f892e80..53b54de 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -21,6 +21,7 @@ import java.util.Calendar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, 
GenericArrayData, PermissiveMode}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -590,4 +591,26 @@ class JsonExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       """{"t":"2015-12-31T16:00:00"}"""
     )
   }
+
+  test("to_json: verify MapType's value type instead of key type") {
+    // Keys in map are treated as strings when converting to JSON. The type 
doesn't matter at all.
+    val mapType1 = MapType(CalendarIntervalType, IntegerType)
+    val schema1 = StructType(StructField("a", mapType1) :: Nil)
+    val struct1 = Literal.create(null, schema1)
+    checkEvaluation(
+      StructsToJson(Map.empty, struct1, gmtId),
+      null
+    )
+
+    // The value type must be valid for converting to JSON.
+    val mapType2 = MapType(IntegerType, CalendarIntervalType)
+    val schema2 = StructType(StructField("a", mapType2) :: Nil)
+    val struct2 = Literal.create(null, schema2)
+    intercept[TreeNodeException[_]] {
+      checkEvaluation(
+        StructsToJson(Map.empty, struct2, gmtId),
+        null
+      )
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/182478e0/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 69a500c..989f8c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.functions.{from_json, struct, to_json}
+import org.apache.spark.sql.functions.{from_json, lit, map, struct, to_json}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -188,15 +188,33 @@ class JsonFunctionsSuite extends QueryTest with 
SharedSQLContext {
       Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
   }
 
-  test("to_json unsupported type") {
+  test("to_json - key types of map don't matter") {
+    // interval type is invalid for converting to JSON. However, the keys of a 
map are treated
+    // as strings, so its type doesn't matter.
     val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
-      .select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
+      .select(struct(map($"a._1".cast(CalendarIntervalType), 
lit("a")).as("col1")).as("c"))
+    checkAnswer(
+      df.select(to_json($"c")),
+      Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil)
+  }
+
+  test("to_json unsupported type") {
+    val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
+    val df = 
baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
     val e = intercept[AnalysisException]{
       // Unsupported type throws an exception
       df.select(to_json($"c")).collect()
     }
     assert(e.getMessage.contains(
       "Unable to convert column a of type calendarinterval to JSON."))
+
+    // interval type is invalid for converting to JSON. We can't use it as 
value type of a map.
+    val df2 = baseDf
+      .select(struct(map(lit("a"), 
$"a._1".cast(CalendarIntervalType)).as("col1")).as("c"))
+    val e2 = intercept[AnalysisException] {
+      df2.select(to_json($"c")).collect()
+    }
+    assert(e2.getMessage.contains("Unable to convert column col1 of type 
calendarinterval to JSON"))
   }
 
   test("roundtrip in to_json and from_json - struct") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to