This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f553c9993e4 [SPARK-42782][SQL][TESTS] Hive compatibility check for get_json_object f553c9993e4 is described below commit f553c9993e4f09f6756240e2aecb5c54be417861 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Tue Mar 14 09:04:15 2023 -0700 [SPARK-42782][SQL][TESTS] Hive compatibility check for get_json_object ### What changes were proposed in this pull request? This PR ports the [tests](https://github.com/apache/hive/blob/rel/release-3.1.3/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFJson.java) for `get_json_object` from the Apache Hive project. ### Why are the changes needed? Increase test coverage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A. Closes #40409 from wangyum/SPARK-42782. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index f2e0fd57738..4c07c883c84 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -81,6 +81,90 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { expected) } + test("SPARK-42782: Hive compatibility check for get_json_object") { + val book0 = "{\"author\":\"Nigel Rees\",\"title\":\"Sayings of the Century\"" + + ",\"category\":\"reference\",\"price\":8.95}" + val backet0 = "[1,2,{\"b\":\"y\",\"a\":\"x\"}]" + val backet = "[" + backet0 + ",[3,4],[5,6]]" + val backetFlat = backet0.substring(0, backet0.length() - 1) + ",3,4,5,6]" + + val book = "[" + book0 + ",{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\"," + + "\"category\":\"fiction\",\"price\":8.99" + + ",\"isbn\":\"0-553-21311-3\"},{\"author\":\"J. R. R. Tolkien\"" + + ",\"title\":\"The Lord of the Rings\",\"category\":\"fiction\"" + + ",\"reader\":[{\"age\":25,\"name\":\"bob\"},{\"age\":26,\"name\":\"jack\"}]" + + ",\"price\":22.99,\"isbn\":\"0-395-19395-8\"}]" + + val json = "{\"store\":{\"fruit\":[{\"weight\":8,\"type\":\"apple\"}," + + "{\"weight\":9,\"type\":\"pear\"}],\"basket\":" + backet + ",\"book\":" + book + + ",\"bicycle\":{\"price\":19.95,\"color\":\"red\"}}" + + ",\"email\":\"amy@only_for_json_udf_test.net\"" + + ",\"owner\":\"amy\",\"zip code\":\"94025\",\"fb:testid\":\"1234\"}" + + // Basic test + runTest(json, "$.owner", "amy") + runTest(json, "$.store.bicycle", "{\"price\":19.95,\"color\":\"red\"}") + runTest(json, "$.store.book", book) + runTest(json, "$.store.book[0]", book0) + runTest(json, "$.store.book[*]", book) + runTest(json, "$.store.book[0].category", "reference") + runTest(json, "$.store.book[*].category", "[\"reference\",\"fiction\",\"fiction\"]") + runTest(json, "$.store.book[*].reader[0].age", "25") + runTest(json, "$.store.book[*].reader[*].age", "[25,26]") + runTest(json, "$.store.basket[0][1]", "2") + runTest(json, "$.store.basket[*]", backet) + runTest(json, "$.store.basket[*][0]", "[1,3,5]") + runTest(json, "$.store.basket[0][*]", backet0) + runTest(json, "$.store.basket[*][*]", backetFlat) + runTest(json, "$.store.basket[0][2].b", "y") + runTest(json, "$.store.basket[0][*].b", "[\"y\"]") + runTest(json, "$.non_exist_key", null) + runTest(json, "$.store.book[10]", null) + runTest(json, "$.store.book[0].non_exist_key", null) + runTest(json, "$.store.basket[*].non_exist_key", null) + runTest(json, "$.store.basket[0][*].non_exist_key", null) + runTest(json, "$.store.basket[*][*].non_exist_key", null) + runTest(json, "$.zip code", "94025") + runTest(json, "$.fb:testid", "1234") + runTest("{\"a\":\"b\nc\"}", "$.a", "b\nc") + + // Test root array + runTest("[1,2,3]", "$[0]", "1") + runTest("[1,2,3]", "$.[0]", null) // Not supported + runTest("[1,2,3]", "$.[1]", null) // Not supported + runTest("[1,2,3]", "$[1]", "2") + + runTest("[1,2,3]", "$[3]", null) + runTest("[1,2,3]", "$.[*]", null) // Not supported + runTest("[1,2,3]", "$[*]", "[1,2,3]") + runTest("[1,2,3]", "$", "[1,2,3]") + runTest("[{\"k1\":\"v1\"},{\"k2\":\"v2\"},{\"k3\":\"v3\"}]", "$[2]", "{\"k3\":\"v3\"}") + runTest("[{\"k1\":\"v1\"},{\"k2\":\"v2\"},{\"k3\":\"v3\"}]", "$[2].k3", "v3") + runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1[0].k11[1]", "2") + runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1[0].k11", "[1,2,3]") + runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1[0]", "{\"k11\":[1,2,3]}") + runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1", "[{\"k11\":[1,2,3]}]") + runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0]", "{\"k1\":[{\"k11\":[1,2,3]}]}") + runTest("[[1,2,3],[4,5,6],[7,8,9]]", "$[1]", "[4,5,6]") + runTest("[[1,2,3],[4,5,6],[7,8,9]]", "$[1][0]", "4") + runTest("[\"a\",\"b\"]", "$[1]", "b") + runTest("[[\"a\",\"b\"]]", "$[0][1]", "b") + + runTest("[1,2,3]", "[0]", null) + runTest("[1,2,3]", "$0", null) + runTest("[1,2,3]", "0", null) + runTest("[1,2,3]", "$.", null) + + runTest("[1,2,3]", "$", "[1,2,3]") + runTest("{\"a\":4}", "$", "{\"a\":4}") + + def runTest(json: String, path: String, exp: String): Unit = { + checkAnswer( + Seq(json).toDF().selectExpr(s"get_json_object(value, '$path')"), + Row(exp)) + } + } + test("json_tuple select") { val df: DataFrame = tuples.toDF("key", "jstring") val expected = --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org