Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20727#discussion_r172362385
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
---
@@ -172,6 +174,43 @@ class TextSuite extends QueryTest with
SharedSQLContext {
}
}
+ def testLineSeparator(lineSep: String): Unit = {
+ test(s"SPARK-23577: Support line separator - lineSep: '$lineSep'") {
+ // Read
+ val values = Seq("a", "b", "\nc")
+ val data = values.mkString(lineSep)
+ val dataWithTrailingLineSep = s"$data$lineSep"
+ Seq(data, dataWithTrailingLineSep).foreach { lines =>
+ withTempPath { path =>
+ Files.write(path.toPath, lines.getBytes(StandardCharsets.UTF_8))
+ val df = spark.read.option("lineSep",
lineSep).text(path.getAbsolutePath)
+ checkAnswer(df, Seq("a", "b", "\nc").toDF())
+ }
+ }
+
+ // Write
+ withTempPath { path =>
+ values.toDF().coalesce(1)
+ .write.option("lineSep", lineSep).text(path.getAbsolutePath)
+ val partFile = Utils.recursiveList(path).filter(f =>
f.getName.startsWith("part-")).head
+ val readBack = new String(Files.readAllBytes(partFile.toPath),
StandardCharsets.UTF_8)
+ assert(readBack === s"a${lineSep}b${lineSep}\nc${lineSep}")
+ }
+
+ // Roundtrip
+ withTempPath { path =>
+ val df = values.toDF()
+ df.write.option("lineSep", lineSep).text(path.getAbsolutePath)
+ val readBack = spark.read.option("lineSep",
lineSep).text(path.getAbsolutePath)
+ checkAnswer(df, readBack)
+ }
+ }
+ }
+
+ Seq("|", "^", "::", "!!!@3").foreach { lineSep =>
--- End diff --
Sure, sounds a good idea.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]