This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0340f805fdd2 [SPARK-47497][SQL][FOLLOWUP] Add a UT for `nested
structure` for the function `to_csv`
0340f805fdd2 is described below
commit 0340f805fdd2399096ab91203f0cdb7c21600ae4
Author: panbingkun <[email protected]>
AuthorDate: Tue Mar 26 08:57:43 2024 +0800
[SPARK-47497][SQL][FOLLOWUP] Add a UT for `nested structure` for the
function `to_csv`
### What changes were proposed in this pull request?
The pr aims to add a UT for `nested structure` for the function `to_csv`.
FollowUp: https://github.com/apache/spark/pull/45657
### Why are the changes needed?
Add a UT, improve test coverage.
<img width="908" alt="image"
src="https://github.com/apache/spark/assets/15246973/226e3df1-7b24-49e7-a2b6-c9cdcb66ad69">
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Manually test.
- Pass GA.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #45692 from panbingkun/to_csv_ut.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/CsvFunctionsSuite.scala | 28 ++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 196a1fd38837..85e9ada0dce5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -768,4 +768,32 @@ class CsvFunctionsSuite extends QueryTest with
SharedSparkSession {
context = ExpectedContext(fragment = "to_csv",
getCurrentClassCallSitePattern)
)
}
+
+ test("SPARK-47497: to_csv support the data of nested structure as pretty
strings") {
+ // The item of the Array is a Map
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice",
+ Array(Map("math" -> 100L, "english" -> 200L, "science" -> null),
+ Map("math" -> 300L, "english" -> 400L, "science" -> 500L)))))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("scores", ArrayType(MapType(StringType, LongType)))))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+ val actual1 = df.select(to_csv($"value"))
+ checkAnswer(actual1, Row("2,Alice," +
+ "\"[{math -> 100, english -> 200, science ->}, " +
+ "{math -> 300, english -> 400, science -> 500}]\""))
+
+ val options = Map("nullValue" -> "-")
+ val actual2 = df.select(to_csv($"value", options.asJava))
+ checkAnswer(actual2, Row("2,Alice," +
+ "\"[{math -> 100, english -> 200, science -> -}, " +
+ "{math -> 300, english -> 400, science -> 500}]\""))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]