Github user crafty-coder commented on a diff in the pull request:
https://github.com/apache/spark/pull/20949#discussion_r203286908
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
---
@@ -512,6 +513,43 @@ class CSVSuite extends QueryTest with SharedSQLContext
with SQLTestUtils with Te
}
}
+ test("SPARK-19018: Save csv with custom charset") {
+
+ // scalastyle:off nonascii
+ val content = "µà áâä ÃÃÃ"
+ // scalastyle:on nonascii
+
+ Seq("iso-8859-1", "utf-8", "utf-16", "utf-32", "windows-1250").foreach
{ encoding =>
+ withTempDir { dir =>
+ val csvDir = new File(dir, "csv")
+
+ val originalDF = Seq(content).toDF("_c0").repartition(1)
+ originalDF.write
+ .option("encoding", encoding)
+ .csv(csvDir.getCanonicalPath)
+
+ csvDir.listFiles().filter(_.getName.endsWith("csv")).foreach({
csvFile =>
+ val readback = Files.readAllBytes(csvFile.toPath)
+ val expected = (content +
"\n").getBytes(Charset.forName(encoding))
--- End diff --
Good Point!
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]