LuciferYang commented on code in PR #45657:
URL: https://github.com/apache/spark/pull/45657#discussion_r1537145637
##########
sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala:
##########
@@ -603,4 +605,167 @@ class CsvFunctionsSuite extends QueryTest with
SharedSparkSession {
$"csv", schema_of_csv("1,2\n2"), Map.empty[String, String].asJava))
checkAnswer(actual, Row(Row(1, "2\n2")))
}
+
+ test("SPARK-47497: null value display when w or w/o options (nullValue)") {
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice", null, "y")))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("x", StringType),
+ StructField("y", StringType)))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+ val actual1 = df.select(to_csv($"value"))
+ checkAnswer(actual1, Row("2,Alice,,y"))
+
+ val options = Map("nullValue" -> "-")
+ val actual2 = df.select(to_csv($"value", options.asJava))
+ checkAnswer(actual2, Row("2,Alice,-,y"))
+ }
+
+ test("SPARK-47497: to_csv support the data of ArrayType as pretty strings") {
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice", Array(100L, 200L, null, 300L))))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("scores", ArrayType(LongType))))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+ val actual1 = df.select(to_csv($"value"))
+ checkAnswer(actual1, Row("2,Alice,\"[100, 200,, 300]\""))
+
+ val options = Map("nullValue" -> "-")
+ val actual2 = df.select(to_csv($"value", options.asJava))
+ checkAnswer(actual2, Row("2,Alice,\"[100, 200, -, 300]\""))
+ }
+
+ test("SPARK-47497: to_csv support the data of MapType as pretty strings") {
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice",
+ Map("math" -> 100L, "english" -> 200L, "science" -> null))))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("scores", MapType(StringType, LongType))))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+ val actual1 = df.select(to_csv($"value"))
+ checkAnswer(actual1, Row("2,Alice,\"{math -> 100, english -> 200, science
->}\""))
+
+ val options = Map("nullValue" -> "-")
+ val actual2 = df.select(to_csv($"value", options.asJava))
+ checkAnswer(actual2, Row("2,Alice,\"{math -> 100, english -> 200, science
-> -}\""))
+ }
+
+ test("SPARK-47497: to_csv support the data of StructType as pretty strings")
{
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice", Row(100L, 200L, null))))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("scores", StructType(Seq(
+ StructField("id1", LongType),
+ StructField("id2", LongType),
+ StructField("id3", LongType))))))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+ val actual1 = df.select(to_csv($"value"))
+ checkAnswer(actual1, Row("2,Alice,\"{100, 200,}\""))
+
+ val options = Map("nullValue" -> "-")
+ val actual2 = df.select(to_csv($"value", options.asJava))
+ checkAnswer(actual2, Row("2,Alice,\"{100, 200, -}\""))
+ }
+
+ test("SPARK-47497: to_csv support the data of BinaryType as pretty strings")
{
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice", "a".getBytes(StandardCharsets.UTF_8))))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("a", BinaryType)))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+ val actual = df.select(to_csv($"value"))
+ checkAnswer(actual, Row("2,Alice,[61]"))
+ }
+
+ test("SPARK-47497: to_csv can display NullType data") {
+ val df = Seq(Tuple1(Tuple1(null))).toDF("value")
+ val options = Map("nullValue" -> "-")
+ val actual = df.select(to_csv($"value", options.asJava))
+ checkAnswer(actual, Row("-"))
+ }
+
+ test("SPARK-47497: from_csv/to_csv does not support VariantType data") {
+ val rows = new java.util.ArrayList[Row]()
+ rows.add(Row(1L, Row(2L, "Alice", new VariantVal(Array[Byte](1, 2, 3),
Array[Byte](4, 5)))))
+
+ val valueSchema = StructType(Seq(
+ StructField("age", LongType),
+ StructField("name", StringType),
+ StructField("v", VariantType)))
+ val schema = StructType(Seq(
+ StructField("key", LongType),
+ StructField("value", valueSchema)))
+
+ val df = spark.createDataFrame(rows, schema)
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.select(to_csv($"value")).collect()
+ },
+ errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+ parameters = Map(
+ "functionName" -> "`to_csv`",
+ "dataType" -> "\"STRUCT<age: BIGINT, name: STRING, v: VARIANT>\"",
+ "sqlExpr" -> "\"to_csv(value)\""),
+ context = ExpectedContext(fragment = "to_csv",
getCurrentClassCallSitePattern)
+ )
+
+ checkError(
+ exception = intercept[SparkUnsupportedOperationException] {
+ df.select(from_csv(lit("data"), valueSchema, Map.empty[String,
String])).collect()
+ },
+ errorClass = "UNSUPPORTED_DATATYPE",
+ parameters = Map("typeName" -> "\"VARIANT\"")
+ )
+ }
+
+ test("SPARK-47497: the input of to_csv must be StructType") {
Review Comment:
What if the input is a nested structure? For example, the item of the Array
is a Map, does it remain consistent with what we've seen before?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]