Repository: incubator-carbondata
Updated Branches:
  refs/heads/master d6d5eca30 -> a1b8afaff


[CARBONDATA-794] Numeric dimension column value should be validated for the bad 
record


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/3bc9152e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/3bc9152e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/3bc9152e

Branch: refs/heads/master
Commit: 3bc9152e51b8771c68586c4e5e638be1c8acbd81
Parents: d6d5eca
Author: mohammadshahidkhan <mohdshahidkhan1...@gmail.com>
Authored: Sun Mar 19 23:21:40 2017 +0530
Committer: Venkata Ramana G <ramana.gollam...@huawei.com>
Committed: Mon Mar 20 20:27:45 2017 +0530

----------------------------------------------------------------------
 .../src/test/resources/badrecords/dummy.csv     |   4 +
 .../NumericDimensionBadRecordTest.scala         | 161 +++++++++++++++++++
 .../impl/DictionaryFieldConverterImpl.java      |  20 ++-
 .../converter/impl/FieldEncoderFactory.java     |   2 +-
 .../converter/impl/RowConverterImpl.java        |   2 +-
 5 files changed, 184 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/resources/badrecords/dummy.csv 
b/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
new file mode 100644
index 0000000..39bf37f
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
@@ -0,0 +1,4 @@
+name,dob,weight
+\N,\N,1
+,,xfds
+"","",""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
 
b/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
new file mode 100644
index 0000000..1301f5d
--- /dev/null
+++ 
b/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.badrecordloger
+
+import java.io.File
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.spark.sql.hive.HiveContext
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test Class for detailed query on timestamp dataDataTypes
+ *
+ *
+ */
+class NumericDimensionBadRecordTest extends QueryTest with BeforeAndAfterAll {
+  var hiveContext: HiveContext = _
+
+  override def beforeAll {
+    try {
+      sql("drop table IF EXISTS intDataType")
+      sql("drop table IF EXISTS longDataType")
+      sql("drop table IF EXISTS doubleDataType")
+      sql("drop table IF EXISTS floatDataType")
+      sql("drop table IF EXISTS bigDecimalDataType")
+      sql("drop table IF EXISTS stringDataType")
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+          new File("./target/test/badRecords")
+            .getCanonicalPath)
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, 
"yyyy/MM/dd")
+      var csvFilePath = ""
+
+      // 1. bad record int DataType dimension
+      sql("create table intDataType(name String, dob timestamp, weight int)" +
+          " STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table intDataType 
options " +
+          
"('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+      // 2. bad record long DataType dimension
+      sql("create table longDataType(name String, dob timestamp, weight long)" 
+
+          " STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table 
longDataType options " +
+          
"('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+      // 3. bad record double DataType dimension
+      sql("create table doubleDataType(name String, dob timestamp, weight 
double)" +
+          " STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table 
doubleDataType options " +
+          
"('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+      // 4. bad record float DataType dimension
+      sql("create table floatDataType(name String, dob timestamp, weight 
float)" +
+          " STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table 
floatDataType options " +
+          
"('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+      // 5. bad record decimal DataType dimension
+      sql("create table bigDecimalDataType(name String, dob timestamp, weight 
decimal(3,1))" +
+          " STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table 
bigDecimalDataType options " +
+          
"('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+      // 6. bad record string DataType dimension
+      sql("create table stringDataType(name String, dob timestamp, weight 
String)" +
+          " STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table 
stringDataType options " +
+          
"('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+    } catch {
+      case x: Throwable => {
+        System.out.println(x.getMessage)
+        CarbonProperties.getInstance()
+          .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, 
"dd-MM-yyyy")
+      }
+    }
+  }
+
+   test("select count(*) from intDataType") {
+    checkAnswer(
+      sql("select count(*) from intDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from longDataType") {
+    checkAnswer(
+      sql("select count(*) from longDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+
+  test("select count(*) from doubleDataType") {
+    checkAnswer(
+      sql("select count(*) from doubleDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from floatDataType") {
+    checkAnswer(
+      sql("select count(*) from floatDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from bigDecimalDataType") {
+    checkAnswer(
+      sql("select count(*) from bigDecimalDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from stringDataType") {
+    checkAnswer(
+      sql("select count(*) from stringDataType"),
+      Seq(Row(3)
+      )
+    )
+  }
+
+  override def afterAll {
+    sql("drop table IF EXISTS intDataType")
+    sql("drop table IF EXISTS longDataType")
+    sql("drop table IF EXISTS doubleDataType")
+    sql("drop table IF EXISTS floatDataType")
+    sql("drop table IF EXISTS bigDecimalDataType")
+    sql("drop table IF EXISTS stringDataType")
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
 
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
index ae5cadb..62073e2 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
@@ -55,14 +55,17 @@ public class DictionaryFieldConverterImpl extends 
AbstractDictionaryFieldConvert
 
   private DictionaryMessage dictionaryMessage;
 
+  private boolean isEmptyBadRecord;
+
   public DictionaryFieldConverterImpl(DataField dataField,
       Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache,
       CarbonTableIdentifier carbonTableIdentifier, String nullFormat, int 
index,
       DictionaryClient client, boolean useOnePass, String storePath, boolean 
tableInitialize,
-      Map<Object, Integer> localCache) throws IOException {
+      Map<Object, Integer> localCache, boolean isEmptyBadRecord) throws 
IOException {
     this.index = index;
     this.carbonDimension = (CarbonDimension) dataField.getColumn();
     this.nullFormat = nullFormat;
+    this.isEmptyBadRecord = isEmptyBadRecord;
     DictionaryColumnUniqueIdentifier identifier =
         new DictionaryColumnUniqueIdentifier(carbonTableIdentifier,
             dataField.getColumn().getColumnIdentifier(), 
dataField.getColumn().getDataType());
@@ -94,8 +97,19 @@ public class DictionaryFieldConverterImpl extends 
AbstractDictionaryFieldConvert
   @Override public void convert(CarbonRow row, BadRecordLogHolder logHolder)
       throws CarbonDataLoadingException {
     try {
-      String parsedValue = DataTypeUtil.parseValue(row.getString(index), 
carbonDimension);
-      if (null == parsedValue || parsedValue.equals(nullFormat)) {
+      String dimensionValue = row.getString(index);
+      if (dimensionValue == null || dimensionValue.equals(nullFormat)) {
+        dimensionValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
+      }
+      String parsedValue = DataTypeUtil.parseValue(dimensionValue, 
carbonDimension);
+      if (null == parsedValue) {
+        if ((dimensionValue.length() > 0) || (dimensionValue.length() == 0 && 
isEmptyBadRecord)) {
+          String dataType = carbonDimension.getDataType().getName();
+          logHolder.setReason(
+              "The value " + " \"" + dimensionValue + "\"" + " with column 
name " + carbonDimension
+                  .getColName() + " and column data type " + dataType + " is 
not a valid "
+                  + dataType + " type.");
+        }
         row.update(CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY, 
index);
       } else {
         row.update(dictionaryGenerator.getOrGenerateKey(parsedValue), index);

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
 
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
index 660f256..158f3f0 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
@@ -75,7 +75,7 @@ public class FieldEncoderFactory {
       } else if (dataField.getColumn().hasEncoding(Encoding.DICTIONARY) &&
           !dataField.getColumn().isComplex()) {
         return new DictionaryFieldConverterImpl(dataField, cache, 
carbonTableIdentifier, nullFormat,
-            index, client, useOnePass, storePath, tableInitialize, localCache);
+            index, client, useOnePass, storePath, tableInitialize, localCache, 
isEmptyBadRecord);
       } else if (dataField.getColumn().isComplex()) {
         return new ComplexFieldConverterImpl(
             createComplexType(dataField, cache, carbonTableIdentifier,

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
 
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
index 3ba7bdf..2471314 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
@@ -155,7 +155,7 @@ public class RowConverterImpl implements RowConverter {
       fieldConverters[i].convert(row, logHolder);
       if (!logHolder.isLogged() && logHolder.isBadRecordNotAdded()) {
         if (badRecordLogger.isDataLoadFail()) {
-          String error = "Data load failed due to bad bad record: " + 
logHolder.getReason();
+          String error = "Data load failed due to bad record: " + 
logHolder.getReason();
           throw new CarbonDataLoadingException(error);
         }
         badRecordLogger.addBadRecordsToBuilder(copy.getData(), 
logHolder.getReason());

Reply via email to