spark git commit: [SPARK-13114][SQL] Add a test for tokens more than the fields in schema

rxin Tue, 02 Feb 2016 10:41:25 -0800

Repository: spark
Updated Branches:
  refs/heads/master 29d92181d -> b93830126



[SPARK-13114][SQL] Add a test for tokens more than the fields in schema

https://issues.apache.org/jira/browse/SPARK-13114

This PR adds a test for tokens more than the fields in schema.

Author: hyukjinkwon <[email protected]>

Closes #11020 from HyukjinKwon/SPARK-13114.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9383012
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9383012
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9383012

Branch: refs/heads/master
Commit: b93830126cc59a26e2cfb5d7b3c17f9cfbf85988
Parents: 29d9218
Author: hyukjinkwon <[email protected]>
Authored: Tue Feb 2 10:41:06 2016 -0800
Committer: Reynold Xin <[email protected]>
Committed: Tue Feb 2 10:41:06 2016 -0800

----------------------------------------------------------------------
 sql/core/src/test/resources/cars-malformed.csv          |  6 ++++++
 .../spark/sql/execution/datasources/csv/CSVSuite.scala  | 12 ++++++++++++
 2 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b9383012/sql/core/src/test/resources/cars-malformed.csv
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/cars-malformed.csv 
b/sql/core/src/test/resources/cars-malformed.csv
new file mode 100644
index 0000000..cfa378c
--- /dev/null
+++ b/sql/core/src/test/resources/cars-malformed.csv
@@ -0,0 +1,6 @@
+~ All the rows here are malformed having tokens more than the schema (header).
+year,make,model,comment,blank
+"2012","Tesla","S","No comment",,null,null
+
+1997,Ford,E350,"Go get one now they are going fast",,null,null
+2015,Chevy,,,,

http://git-wip-us.apache.org/repos/asf/spark/blob/b9383012/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index a79566b..fa4f137 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.types._
 
 class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   private val carsFile = "cars.csv"
+  private val carsMalformedFile = "cars-malformed.csv"
   private val carsFile8859 = "cars_iso-8859-1.csv"
   private val carsTsvFile = "cars.tsv"
   private val carsAltFile = "cars-alternative.csv"
@@ -191,6 +192,17 @@ class CSVSuite extends QueryTest with SharedSQLContext 
with SQLTestUtils {
     assert(exception.getMessage.contains("Malformed line in FAILFAST mode: 
2015,Chevy,Volt"))
   }
 
+  test("test for tokens more than the fields in the schema") {
+    val cars = sqlContext
+      .read
+      .format("csv")
+      .option("header", "false")
+      .option("comment", "~")
+      .load(testFile(carsMalformedFile))
+
+    verifyCars(cars, withHeader = false, checkTypes = false)
+  }
+
   test("test with null quote character") {
     val cars = sqlContext.read
       .format("csv")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-13114][SQL] Add a test for tokens more than the fields in schema

Reply via email to