Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20068#discussion_r158617095 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala --- @@ -1248,4 +1248,49 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { Row("0,2013-111-11 12:13:14") :: Row(null) :: Nil ) } + + test("SPARK-17916: An empty string should not be coerced to null when nullValue is passed.") { + val sparkSession = spark + + val elems = Seq(("bar"), (""), (null: String)) + + // Checks for new behavior where an empty string is not coerced to null. + withTempDir { dir => + val outDir = new File(dir, "out").getCanonicalPath + val nullValue = "\\N" + + import sparkSession.implicits._ + val dsIn = spark.createDataset(elems) + dsIn.write + .option("nullValue", nullValue) + .csv(outDir) + val dsOut = spark.read + .option("nullValue", nullValue) + .schema(dsIn.schema) + .csv(outDir) + .as[(String)] + val computed = dsOut.collect.toSeq + val expected = Seq(("bar"), (null: String)) --- End diff -- I don't think this is quite the expected output? Could we use the examples provided in the JIRA rather than single row ones?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org