srowen commented on a change in pull request #26027: [SPARK-24540][SQL] Support
for multiple character delimiter in Spark CSV read
URL: https://github.com/apache/spark/pull/26027#discussion_r334646667
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
##########
@@ -188,6 +190,30 @@ class CSVSuite extends QueryTest with SharedSparkSession
with TestCsvData {
verifyCars(cars, withHeader = true)
}
+ test("test with tab delimiter and double quote") {
+ val cars = spark.read
+ .options(Map("quote" -> "\"", "delimiter" -> """\t""", "header" ->
"true"))
+ .csv(testFile(carsTsvFile))
+
+ verifyCars(cars, numFields = 6, withHeader = true, checkHeader = false)
+ }
+
+ test("SPARK-24540: test with multiple character delimiter (comma space)") {
+ val cars = spark.read
+ .options(Map("quote" -> "\'", "delimiter" -> ", ", "header" -> "true"))
+ .csv(testFile(carsMultiCharDelimitedFile))
+
+ verifyCars(cars, withHeader = true)
+ }
+
+ test("SPARK-24540: test with multiple (crazy) character delimiter") {
+ val cars = spark.read
+ .options(Map("quote" -> "\'", "delimiter" -> """_/-\\_""", "header" ->
"true"))
Review comment:
Oh right, Scala doesn't unescape it because of `"""`. This makes sense then.
Good to have more tests.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]