Repository: spark
Updated Branches:
  refs/heads/branch-1.6 47c2c8ce3 -> 2ecc0f243


[SPARK-12028] [SQL] get_json_object returns an incorrect result when the value 
is null literals

When calling `get_json_object` for the following two cases, both results are 
`"null"`:

```scala
    val tuple: Seq[(String, String)] = ("5", """{"f1": null}""") :: Nil
    val df: DataFrame = tuple.toDF("key", "jstring")
    val res = df.select(functions.get_json_object($"jstring", "$.f1")).collect()
```
```scala
    val tuple2: Seq[(String, String)] = ("5", """{"f1": "null"}""") :: Nil
    val df2: DataFrame = tuple2.toDF("key", "jstring")
    val res3 = df2.select(functions.get_json_object($"jstring", 
"$.f1")).collect()
```

Fixed the problem and also added a test case.

Author: gatorsmile <[email protected]>

Closes #10018 from gatorsmile/get_json_object.

(cherry picked from commit 149cd692ee2e127d79386fd8e584f4f70a2906ba)
Signed-off-by: Davies Liu <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ecc0f24
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ecc0f24
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ecc0f24

Branch: refs/heads/branch-1.6
Commit: 2ecc0f2434336ab23c76d19d7543efc1c2b6e412
Parents: 47c2c8c
Author: gatorsmile <[email protected]>
Authored: Fri Nov 27 22:44:08 2015 -0800
Committer: Davies Liu <[email protected]>
Committed: Fri Nov 27 22:44:18 2015 -0800

----------------------------------------------------------------------
 .../catalyst/expressions/jsonExpressions.scala  |  7 +++++--
 .../apache/spark/sql/JsonFunctionsSuite.scala   | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2ecc0f24/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 8cd7323..4991b9c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -298,8 +298,11 @@ case class GetJsonObject(json: Expression, path: 
Expression)
 
       case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name =>
         // exact field match
-        p.nextToken()
-        evaluatePath(p, g, style, xs)
+        if (p.nextToken() != JsonToken.VALUE_NULL) {
+          evaluatePath(p, g, style, xs)
+        } else {
+          false
+        }
 
       case (FIELD_NAME, Wildcard :: xs) =>
         // wildcard field match

http://git-wip-us.apache.org/repos/asf/spark/blob/2ecc0f24/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 14fd56f..1f384ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -39,6 +39,26 @@ class JsonFunctionsSuite extends QueryTest with 
SharedSQLContext {
     ("6", "[invalid JSON string]") ::
     Nil
 
+  test("function get_json_object - null") {
+    val df: DataFrame = tuples.toDF("key", "jstring")
+    val expected =
+      Row("1", "value1", "value2", "3", null, "5.23") ::
+        Row("2", "value12", "2", "value3", "4.01", null) ::
+        Row("3", "value13", "2", "value33", "value44", "5.01") ::
+        Row("4", null, null, null, null, null) ::
+        Row("5", "", null, null, null, null) ::
+        Row("6", null, null, null, null, null) ::
+        Nil
+
+    checkAnswer(
+      df.select($"key", functions.get_json_object($"jstring", "$.f1"),
+        functions.get_json_object($"jstring", "$.f2"),
+        functions.get_json_object($"jstring", "$.f3"),
+        functions.get_json_object($"jstring", "$.f4"),
+        functions.get_json_object($"jstring", "$.f5")),
+      expected)
+  }
+
   test("json_tuple select") {
     val df: DataFrame = tuples.toDF("key", "jstring")
     val expected =


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to