This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new c47fc2d8265 [SPARK-42692][CONNECT] Implement `Dataset.toJSON` c47fc2d8265 is described below commit c47fc2d82656e569d3ef84a090ea3d2cb25f70eb Author: yangjie01 <yangji...@baidu.com> AuthorDate: Tue Mar 7 15:46:35 2023 -0400 [SPARK-42692][CONNECT] Implement `Dataset.toJSON` ### What changes were proposed in this pull request? This pr aims to implement Dataset.toJSON. ### Why are the changes needed? Add Spark connect jvm client api coverage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Add new test - Manually checked Scala 2.13 Closes #40319 from LuciferYang/SPARK-42692. Authored-by: yangjie01 <yangji...@baidu.com> Signed-off-by: Herman van Hovell <her...@databricks.com> (cherry picked from commit 51504e4527195f63f33294752f092111d01e5a46) Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../main/scala/org/apache/spark/sql/Dataset.scala | 3 ++- .../org/apache/spark/sql/ClientE2ETestSuite.scala | 13 +++++++++ .../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++ .../query-tests/explain-results/toJSON.explain | 2 ++ .../test/resources/query-tests/queries/toJSON.json | 29 +++++++++++++++++++++ .../resources/query-tests/queries/toJSON.proto.bin | Bin 0 -> 185 bytes 6 files changed, 50 insertions(+), 1 deletion(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala index 13dff3a874f..ff37614e87d 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{PrimitiveLongEnc import org.apache.spark.sql.catalyst.expressions.RowOrdering import org.apache.spark.sql.connect.client.SparkResult import org.apache.spark.sql.connect.common.DataTypeProtoConverter +import org.apache.spark.sql.functions.{struct, to_json} import org.apache.spark.sql.types.{Metadata, StructType} import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils @@ -2777,7 +2778,7 @@ class Dataset[T] private[sql] ( } def toJSON: Dataset[String] = { - throw new UnsupportedOperationException("toJSON is not implemented.") + select(to_json(struct(col("*")))).as(StringEncoder) } private[sql] def analyze: proto.AnalyzePlanResponse = { diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala index abc182c1b8d..780280144b5 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala @@ -631,6 +631,19 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper { val otherPlan = spark.sql("select 1") assert(plan.sameSemantics(otherPlan)) } + + test("toJSON") { + val expected = Array( + """{"b":0.0,"id":0,"d":"world","a":0}""", + """{"b":0.1,"id":1,"d":"world","a":1}""", + """{"b":0.2,"id":2,"d":"world","a":0}""") + val result = spark + .range(3) + .select(generateMyTypeColumns: _*) + .toJSON + .collect() + assert(result sameElements expected) + } } private[sql] case class MyType(id: Long, a: Double, b: Double) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index 85523a22d2b..027b7a30246 100755 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -257,6 +257,10 @@ class PlanGenerationTestSuite session.read.json(testDataPath.resolve("people.json").toString) } + test("toJSON") { + complex.toJSON + } + test("read csv") { session.read.csv(testDataPath.resolve("people.csv").toString) } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain new file mode 100644 index 00000000000..1698c562732 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain @@ -0,0 +1,2 @@ +Project [to_json(struct(id, id#0L, a, a#0, b, b#0, d, d#0, e, e#0, f, f#0, g, g#0), Some(America/Los_Angeles)) AS to_json(struct(id, a, b, d, e, f, g))#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json new file mode 100644 index 00000000000..278767e620a --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "to_json", + "arguments": [{ + "unresolvedFunction": { + "functionName": "struct", + "arguments": [{ + "unresolvedStar": { + } + }] + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin new file mode 100644 index 00000000000..e08d0fd2180 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin differ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org