This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 51504e45271 [SPARK-42692][CONNECT] Implement `Dataset.toJSON`
51504e45271 is described below
commit 51504e4527195f63f33294752f092111d01e5a46
Author: yangjie01 <[email protected]>
AuthorDate: Tue Mar 7 15:46:35 2023 -0400
[SPARK-42692][CONNECT] Implement `Dataset.toJSON`
### What changes were proposed in this pull request?
This pr aims to implement Dataset.toJSON.
### Why are the changes needed?
Add Spark connect jvm client api coverage.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- Add new test
- Manually checked Scala 2.13
Closes #40319 from LuciferYang/SPARK-42692.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: Herman van Hovell <[email protected]>
---
.../main/scala/org/apache/spark/sql/Dataset.scala | 3 ++-
.../org/apache/spark/sql/ClientE2ETestSuite.scala | 13 +++++++++
.../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++
.../query-tests/explain-results/toJSON.explain | 2 ++
.../test/resources/query-tests/queries/toJSON.json | 29 +++++++++++++++++++++
.../resources/query-tests/queries/toJSON.proto.bin | Bin 0 -> 185 bytes
6 files changed, 50 insertions(+), 1 deletion(-)
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index 13dff3a874f..ff37614e87d 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -29,6 +29,7 @@ import
org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{PrimitiveLongEnc
import org.apache.spark.sql.catalyst.expressions.RowOrdering
import org.apache.spark.sql.connect.client.SparkResult
import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.functions.{struct, to_json}
import org.apache.spark.sql.types.{Metadata, StructType}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.util.Utils
@@ -2777,7 +2778,7 @@ class Dataset[T] private[sql] (
}
def toJSON: Dataset[String] = {
- throw new UnsupportedOperationException("toJSON is not implemented.")
+ select(to_json(struct(col("*")))).as(StringEncoder)
}
private[sql] def analyze: proto.AnalyzePlanResponse = {
diff --git
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index abc182c1b8d..780280144b5 100644
---
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -631,6 +631,19 @@ class ClientE2ETestSuite extends RemoteSparkSession with
SQLHelper {
val otherPlan = spark.sql("select 1")
assert(plan.sameSemantics(otherPlan))
}
+
+ test("toJSON") {
+ val expected = Array(
+ """{"b":0.0,"id":0,"d":"world","a":0}""",
+ """{"b":0.1,"id":1,"d":"world","a":1}""",
+ """{"b":0.2,"id":2,"d":"world","a":0}""")
+ val result = spark
+ .range(3)
+ .select(generateMyTypeColumns: _*)
+ .toJSON
+ .collect()
+ assert(result sameElements expected)
+ }
}
private[sql] case class MyType(id: Long, a: Double, b: Double)
diff --git
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 85523a22d2b..027b7a30246 100755
---
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -257,6 +257,10 @@ class PlanGenerationTestSuite
session.read.json(testDataPath.resolve("people.json").toString)
}
+ test("toJSON") {
+ complex.toJSON
+ }
+
test("read csv") {
session.read.csv(testDataPath.resolve("people.csv").toString)
}
diff --git
a/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
b/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
new file mode 100644
index 00000000000..1698c562732
--- /dev/null
+++
b/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
@@ -0,0 +1,2 @@
+Project [to_json(struct(id, id#0L, a, a#0, b, b#0, d, d#0, e, e#0, f, f#0, g,
g#0), Some(America/Los_Angeles)) AS to_json(struct(id, a, b, d, e, f, g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json
b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json
new file mode 100644
index 00000000000..278767e620a
--- /dev/null
+++
b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema":
"struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "to_json",
+ "arguments": [{
+ "unresolvedFunction": {
+ "functionName": "struct",
+ "arguments": [{
+ "unresolvedStar": {
+ }
+ }]
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
new file mode 100644
index 00000000000..e08d0fd2180
Binary files /dev/null and
b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]