Repository: incubator-carbondata
Updated Branches:
  refs/heads/master eac728d11 -> 7ea31a6ae


Problem: Data loading fails if parsing a double value returns infinity

Analysis: During data load, if a value specified is too big for a double 
DataType column then while parsing that value as double result is returned as 
"Infinity". Due to this while we calculate min and max value for measures in 
carbon data writer step it throws an exception.

Fix: If result is Infinity or NAN for double value parsing then make the value 
as null and add it to bad records.

Impact area: Data load which contains non parseable values for a datatype.


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/0f730162
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/0f730162
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/0f730162

Branch: refs/heads/master
Commit: 0f730162dc4d93f117ac772eb910dbca6f9c9bd4
Parents: eac728d
Author: manishgupta88 <tomanishgupt...@gmail.com>
Authored: Thu Oct 13 15:17:52 2016 +0530
Committer: jackylk <jacky.li...@huawei.com>
Committed: Fri Oct 14 21:59:51 2016 +0800

----------------------------------------------------------------------
 .../carbondata/core/util/DataTypeUtil.java      |  6 +++-
 .../src/test/resources/invalidMeasures.csv      |  3 ++
 .../dataload/TestLoadDataGeneral.scala          | 12 +++++++
 .../csvbased/CarbonCSVBasedSeqGenStep.java      | 38 ++++++++++----------
 4 files changed, 40 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index a821fb0..1af87ca 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
@@ -64,7 +64,11 @@ public final class DataTypeUtil {
       case LONG:
         return Long.valueOf(msrValue);
       default:
-        return Double.valueOf(msrValue);
+        Double parsedValue = Double.valueOf(msrValue);
+        if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) {
+          return null;
+        }
+        return parsedValue;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/integration/spark/src/test/resources/invalidMeasures.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/invalidMeasures.csv 
b/integration/spark/src/test/resources/invalidMeasures.csv
new file mode 100644
index 0000000..b573188
--- /dev/null
+++ b/integration/spark/src/test/resources/invalidMeasures.csv
@@ -0,0 +1,3 @@
+India,15000854676378676765378647856378567846578365786347865783456783456783465783465783465783465763478563478567834567834653750834758093478534857348578345789345789347395873483784857348573485734895789347589347589375984759389358347589737583758937589789798437893475893758934758945783475893758947589347587348957389573489758347589734589347589347589347534897589347589347583475893475893475893457893478934575489758973847583947538947583947534897349575375347398733895453444787893758345943458783497874587783597358973589785934789357895378593789357893578935789357893578935785783789357897897893789578935789357893578935789357893578937895783953789578935789357893578935789357893578935789357893789578935789357835378578357835978935357897893535789378953789578935789357893578935789,22.435
+USA,234.43,2224444444444444444444444465558999.23
+Russia,,
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
 
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
index 9280447..4446b5e 100644
--- 
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
+++ 
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
@@ -20,6 +20,7 @@
 package org.apache.carbondata.integration.spark.testsuite.dataload
 
 import java.io.File
+import java.math.BigDecimal
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.common.util.CarbonHiveContext._
@@ -60,6 +61,17 @@ class TestLoadDataGeneral extends QueryTest with 
BeforeAndAfterAll {
     )
   }
 
+  test("test data loading with invalid values for mesasures") {
+    val testData = currentDirectory + "/src/test/resources/invalidMeasures.csv"
+    sql("drop table if exists invalidMeasures")
+    sql("CREATE TABLE invalidMeasures (country String, salary double, age 
decimal(10,2)) STORED BY 'carbondata'")
+    sql(s"LOAD DATA LOCAL INPATH '$testData' into table invalidMeasures 
options('Fileheader'='country,salary,age')")
+    checkAnswer(
+      sql("SELECT * FROM invalidMeasures"),
+      Seq(Row("India",null,new BigDecimal("22.44")), Row("Russia",null,null), 
Row("USA",234.43,null))
+    )
+  }
+
   override def afterAll {
     sql("DROP TABLE loadtest")
   }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
 
b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
index dc7dd22..8959179 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
@@ -994,29 +994,31 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
           }
         } else {
           try {
-            out[memberMapping[dimLen + index] - meta.complexTypes.size()] =
-                (isNull || msr == null || msr.length() == 0) ?
-                    null :
-                    DataTypeUtil
-                        .getMeasureValueBasedOnDataType(msr, 
msrDataType[meta.msrMapping[msrCount]],
-                            meta.carbonMeasures[meta.msrMapping[msrCount]]);
-          } catch (NumberFormatException e) {
-            try {
-              msr = msr.replaceAll(",", "");
-              out[memberMapping[dimLen + index] - meta.complexTypes.size()] = 
DataTypeUtil
+            if (!isNull && null != msr && msr.length() > 0) {
+              Object measureValueBasedOnDataType = DataTypeUtil
                   .getMeasureValueBasedOnDataType(msr, 
msrDataType[meta.msrMapping[msrCount]],
                       meta.carbonMeasures[meta.msrMapping[msrCount]]);
-            } catch (NumberFormatException ex) {
-              addEntryToBadRecords(r, j, columnName, 
msrDataType[meta.msrMapping[msrCount]].name());
-              if (badRecordConvertNullDisable) {
-                return null;
+              if (null == measureValueBasedOnDataType) {
+                addEntryToBadRecords(r, j, columnName,
+                    msrDataType[meta.msrMapping[msrCount]].name());
+                if (badRecordConvertNullDisable) {
+                  return null;
+                }
+                LOGGER.warn("Cannot convert : " + msr
+                    + " to Numeric type value. Value considered as null.");
               }
-              LOGGER.warn("Cant not convert : " + msr
-                  + " to Numeric type value. Value considered as null.");
-              out[memberMapping[dimLen + index] - meta.complexTypes.size()] = 
null;
+              out[memberMapping[dimLen + index] - meta.complexTypes.size()] =
+                  measureValueBasedOnDataType;
             }
+          } catch (NumberFormatException e) {
+            addEntryToBadRecords(r, j, columnName, 
msrDataType[meta.msrMapping[msrCount]].name());
+            if (badRecordConvertNullDisable) {
+              return null;
+            }
+            LOGGER.warn(
+                "Cannot convert : " + msr + " to Numeric type value. Value 
considered as null.");
+            out[memberMapping[dimLen + index] - meta.complexTypes.size()] = 
null;
           }
-
         }
 
         index++;

Reply via email to