Repository: spark Updated Branches: refs/heads/master 29d92181d -> b93830126
[SPARK-13114][SQL] Add a test for tokens more than the fields in schema https://issues.apache.org/jira/browse/SPARK-13114 This PR adds a test for tokens more than the fields in schema. Author: hyukjinkwon <[email protected]> Closes #11020 from HyukjinKwon/SPARK-13114. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9383012 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9383012 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9383012 Branch: refs/heads/master Commit: b93830126cc59a26e2cfb5d7b3c17f9cfbf85988 Parents: 29d9218 Author: hyukjinkwon <[email protected]> Authored: Tue Feb 2 10:41:06 2016 -0800 Committer: Reynold Xin <[email protected]> Committed: Tue Feb 2 10:41:06 2016 -0800 ---------------------------------------------------------------------- sql/core/src/test/resources/cars-malformed.csv | 6 ++++++ .../spark/sql/execution/datasources/csv/CSVSuite.scala | 12 ++++++++++++ 2 files changed, 18 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b9383012/sql/core/src/test/resources/cars-malformed.csv ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/cars-malformed.csv b/sql/core/src/test/resources/cars-malformed.csv new file mode 100644 index 0000000..cfa378c --- /dev/null +++ b/sql/core/src/test/resources/cars-malformed.csv @@ -0,0 +1,6 @@ +~ All the rows here are malformed having tokens more than the schema (header). +year,make,model,comment,blank +"2012","Tesla","S","No comment",,null,null + +1997,Ford,E350,"Go get one now they are going fast",,null,null +2015,Chevy,,,, http://git-wip-us.apache.org/repos/asf/spark/blob/b9383012/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index a79566b..fa4f137 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.types._ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { private val carsFile = "cars.csv" + private val carsMalformedFile = "cars-malformed.csv" private val carsFile8859 = "cars_iso-8859-1.csv" private val carsTsvFile = "cars.tsv" private val carsAltFile = "cars-alternative.csv" @@ -191,6 +192,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(exception.getMessage.contains("Malformed line in FAILFAST mode: 2015,Chevy,Volt")) } + test("test for tokens more than the fields in the schema") { + val cars = sqlContext + .read + .format("csv") + .option("header", "false") + .option("comment", "~") + .load(testFile(carsMalformedFile)) + + verifyCars(cars, withHeader = false, checkTypes = false) + } + test("test with null quote character") { val cars = sqlContext.read .format("csv") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
