Repository: incubator-carbondata Updated Branches: refs/heads/master 8235f82fe -> a473f553e
[CARBONDATA-317] - CSV having only space char is throwing NullPointerException Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/2963bdbf Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/2963bdbf Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/2963bdbf Branch: refs/heads/master Commit: 2963bdbfe3a666075c1856dace593a4dbd690a62 Parents: 8235f82 Author: mohammadshahidkhan <mohdshahidkhan1...@gmail.com> Authored: Thu Oct 13 23:58:52 2016 +0530 Committer: Venkata Ramana G <ramana.gollam...@huawei.com> Committed: Tue Oct 18 00:25:57 2016 +0530 ---------------------------------------------------------------------- .../spark/csv/CarbonCsvRelation.scala | 5 ++ .../resources/emptyrow/csvwithonlyspacechar.csv | 1 + .../emptyrow/TestCSVHavingOnlySpaceChar.scala | 68 ++++++++++++++++++++ 3 files changed, 74 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2963bdbf/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala index 6a8021a..cd629bf 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala @@ -33,6 +33,8 @@ import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation, TableScan import org.apache.spark.sql.types._ import org.slf4j.LoggerFactory +import org.apache.carbondata.processing.etl.DataLoadingException + case class CarbonCsvRelation protected[spark] ( location: String, useHeader: Boolean, @@ -148,6 +150,9 @@ case class CarbonCsvRelation protected[spark] ( .withSkipHeaderRecord(false) CSVParser.parse(firstLine, csvFormat).getRecords.get(0).asScala.toArray } + if(null == firstRow) { + throw new DataLoadingException("First line of the csv is not valid.") + } val header = if (useHeader) { firstRow } else { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2963bdbf/integration/spark/src/test/resources/emptyrow/csvwithonlyspacechar.csv ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/resources/emptyrow/csvwithonlyspacechar.csv b/integration/spark/src/test/resources/emptyrow/csvwithonlyspacechar.csv new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/integration/spark/src/test/resources/emptyrow/csvwithonlyspacechar.csv @@ -0,0 +1 @@ + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2963bdbf/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/emptyrow/TestCSVHavingOnlySpaceChar.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/emptyrow/TestCSVHavingOnlySpaceChar.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/emptyrow/TestCSVHavingOnlySpaceChar.scala new file mode 100644 index 0000000..82d6fdf --- /dev/null +++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/emptyrow/TestCSVHavingOnlySpaceChar.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.integration.spark.testsuite.emptyrow + +import java.io.File + +import org.apache.spark.sql.common.util.CarbonHiveContext._ +import org.apache.spark.sql.common.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +class TestCSVHavingOnlySpaceChar extends QueryTest with BeforeAndAfterAll { + + var csvFilePath : String = null + + override def beforeAll { + sql("drop table if exists emptyRowCarbonTable") + //eid,ename,sal,presal,comm,deptno,Desc + sql( + "create table if not exists emptyRowCarbonTable (eid int,ename String,sal decimal,presal " + + "decimal,comm decimal" + + "(37,37),deptno decimal(18,2),Desc String) STORED BY 'org.apache.carbondata.format'" + ) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/mm/dd") + val currentDirectory = new File(this.getClass.getResource("/").getPath + "/../../") + .getCanonicalPath + csvFilePath = currentDirectory + "/src/test/resources/emptyrow/csvwithonlyspacechar.csv" + } + + + test("dataload") { + try { + sql( + s"""LOAD DATA INPATH '$csvFilePath' INTO table emptyRowCarbonTable OPTIONS('DELIMITER'=',','QUOTECHAR'='"')""") + } catch { + case e: Throwable => + System.out.println(e.getMessage) + assert(e.getMessage.contains("First line of the csv is not valid.")) + } + } + + override def afterAll { + sql("drop table emptyRowCarbonTable") + sql("drop table emptyRowHiveTable") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + } +}