Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20068#discussion_r158617095
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
---
@@ -1248,4 +1248,49 @@ class CSVSuite extends QueryTest with
SharedSQLContext with SQLTestUtils {
Row("0,2013-111-11 12:13:14") :: Row(null) :: Nil
)
}
+
+ test("SPARK-17916: An empty string should not be coerced to null when
nullValue is passed.") {
+ val sparkSession = spark
+
+ val elems = Seq(("bar"), (""), (null: String))
+
+ // Checks for new behavior where an empty string is not coerced to
null.
+ withTempDir { dir =>
+ val outDir = new File(dir, "out").getCanonicalPath
+ val nullValue = "\\N"
+
+ import sparkSession.implicits._
+ val dsIn = spark.createDataset(elems)
+ dsIn.write
+ .option("nullValue", nullValue)
+ .csv(outDir)
+ val dsOut = spark.read
+ .option("nullValue", nullValue)
+ .schema(dsIn.schema)
+ .csv(outDir)
+ .as[(String)]
+ val computed = dsOut.collect.toSeq
+ val expected = Seq(("bar"), (null: String))
--- End diff --
I don't think this is quite the expected output? Could we use the examples
provided in the JIRA rather than single row ones?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]