This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 880a0cf0910f [SPARK-55089][CONNECT] Correct the output schema of toJSON
880a0cf0910f is described below
commit 880a0cf0910f042f2d4d7ea7cad46ff49c8fcad4
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Jan 20 11:51:32 2026 +0900
[SPARK-55089][CONNECT] Correct the output schema of toJSON
### What changes were proposed in this pull request?
Correct the output schema of toJSON
### Why are the changes needed?
classic:
```scala
scala> import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions._
scala> val df = spark.range(10).withColumn("s", col("id").cast("string"))
val df: org.apache.spark.sql.DataFrame = [id: bigint, s: string]
scala> df.toJSON.show()
+----------------+
| value|
+----------------+
|{"id":0,"s":"0"}|
|{"id":1,"s":"1"}|
|{"id":2,"s":"2"}|
|{"id":3,"s":"3"}|
|{"id":4,"s":"4"}|
|{"id":5,"s":"5"}|
|{"id":6,"s":"6"}|
|{"id":7,"s":"7"}|
|{"id":8,"s":"8"}|
|{"id":9,"s":"9"}|
+----------------+
```
connect (scala client)
```scala
scala> df.toJSON.show()
+----------------------+
|to_json(struct(id, s))|
+----------------------+
| {"id":0,"s":"0"}|
| {"id":1,"s":"1"}|
| {"id":2,"s":"2"}|
| {"id":3,"s":"3"}|
| {"id":4,"s":"4"}|
| {"id":5,"s":"5"}|
| {"id":6,"s":"6"}|
| {"id":7,"s":"7"}|
| {"id":8,"s":"8"}|
| {"id":9,"s":"9"}|
+----------------------+
```
### Does this PR introduce _any_ user-facing change?
Yes, the field name will be `value` which is the same with spark classic
### How was this patch tested?
updated golden file
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #53850 from zhengruifeng/fix_toJSON_name.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../org/apache/spark/sql/connect/Dataset.scala | 2 +-
.../query-tests/explain-results/toJSON.explain | 2 +-
.../test/resources/query-tests/queries/toJSON.json | 38 ++++++++++++++++-----
.../resources/query-tests/queries/toJSON.proto.bin | Bin 745 -> 941 bytes
4 files changed, 32 insertions(+), 10 deletions(-)
diff --git
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
index f8cf30ccd4dd..e9595dc64e9f 100644
---
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
+++
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
@@ -1228,7 +1228,7 @@ class Dataset[T] private[sql] (
/** @inheritdoc */
def toJSON: Dataset[String] = {
- select(to_json(struct(col("*")))).as(StringEncoder)
+ select(to_json(struct(col("*"))).as("value")).as(StringEncoder)
}
private[sql] def analyze: proto.AnalyzePlanResponse = {
diff --git
a/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
b/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
index fcb3e173ecaa..6eade342fa7c 100644
---
a/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
+++
b/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
@@ -1,2 +1,2 @@
-Project
[invoke(StructsToJsonEvaluator(Map(),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true),StructField(d,StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),true),StructField(e,ArrayType(IntegerType,true),true),StructField(f,MapType(StringType,StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),true),true),StructField(g,String
[...]
+Project
[invoke(StructsToJsonEvaluator(Map(),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true),StructField(d,StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),true),StructField(e,ArrayType(IntegerType,true),true),StructField(f,MapType(StringType,StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),true),true),StructField(g,String
[...]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
index 8ea8d319e974..9fb93ebb792b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
@@ -12,14 +12,36 @@
}
},
"expressions": [{
- "unresolvedFunction": {
- "functionName": "to_json",
- "arguments": [{
+ "alias": {
+ "expr": {
"unresolvedFunction": {
- "functionName": "struct",
+ "functionName": "to_json",
"arguments": [{
- "unresolvedStar": {
- "planId": "0"
+ "unresolvedFunction": {
+ "functionName": "struct",
+ "arguments": [{
+ "unresolvedStar": {
+ "planId": "0"
+ },
+ "common": {
+ "origin": {
+ "jvmOrigin": {
+ "stackTrace": [{
+ "classLoaderName": "app",
+ "declaringClass":
"org.apache.spark.sql.connect.Dataset",
+ "methodName": "toJSON",
+ "fileName": "Dataset.scala"
+ }, {
+ "classLoaderName": "app",
+ "declaringClass":
"org.apache.spark.sql.PlanGenerationTestSuite",
+ "methodName": "~~trimmed~anonfun~~",
+ "fileName": "PlanGenerationTestSuite.scala"
+ }]
+ }
+ }
+ }
+ }],
+ "isInternal": false
},
"common": {
"origin": {
@@ -58,8 +80,8 @@
}
}
}
- }],
- "isInternal": false
+ },
+ "name": ["value"]
},
"common": {
"origin": {
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
index 0ad2a18105b0..03b201acf794 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]