jeff303 commented on a change in pull request #26027: [SPARK-24540][SQL] 
Support for multiple character delimiter in Spark CSV read
URL: https://github.com/apache/spark/pull/26027#discussion_r334645127
 
 

 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
 ##########
 @@ -188,6 +190,30 @@ class CSVSuite extends QueryTest with SharedSparkSession 
with TestCsvData {
     verifyCars(cars, withHeader = true)
   }
 
+  test("test with tab delimiter and double quote") {
+    val cars = spark.read
+        .options(Map("quote" -> "\"", "delimiter" -> """\t""", "header" -> 
"true"))
+        .csv(testFile(carsTsvFile))
+
+    verifyCars(cars, numFields = 6, withHeader = true, checkHeader = false)
+  }
+
+  test("SPARK-24540: test with multiple character delimiter (comma space)") {
+    val cars = spark.read
+        .options(Map("quote" -> "\'", "delimiter" -> ", ", "header" -> "true"))
+        .csv(testFile(carsMultiCharDelimitedFile))
+
+    verifyCars(cars, withHeader = true)
+  }
+
+  test("SPARK-24540: test with multiple (crazy) character delimiter") {
+    val cars = spark.read
+        .options(Map("quote" -> "\'", "delimiter" -> """_/-\\_""", "header" -> 
"true"))
 
 Review comment:
   I added some additional assertions for the rest of the columns (besides 
year).  The delimiter is given as `_/-\\_`.  This is needed because the fourth 
character of this delimiter is a literal backslash.  So after unescaping (via 
the new utility method), we end up with `_/-\_` as the String that is passed 
into the Univocity parser.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to