Repository: incubator-carbondata Updated Branches: refs/heads/master eac728d11 -> 7ea31a6ae
Problem: Data loading fails if parsing a double value returns infinity Analysis: During data load, if a value specified is too big for a double DataType column then while parsing that value as double result is returned as "Infinity". Due to this while we calculate min and max value for measures in carbon data writer step it throws an exception. Fix: If result is Infinity or NAN for double value parsing then make the value as null and add it to bad records. Impact area: Data load which contains non parseable values for a datatype. Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/0f730162 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/0f730162 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/0f730162 Branch: refs/heads/master Commit: 0f730162dc4d93f117ac772eb910dbca6f9c9bd4 Parents: eac728d Author: manishgupta88 <tomanishgupt...@gmail.com> Authored: Thu Oct 13 15:17:52 2016 +0530 Committer: jackylk <jacky.li...@huawei.com> Committed: Fri Oct 14 21:59:51 2016 +0800 ---------------------------------------------------------------------- .../carbondata/core/util/DataTypeUtil.java | 6 +++- .../src/test/resources/invalidMeasures.csv | 3 ++ .../dataload/TestLoadDataGeneral.scala | 12 +++++++ .../csvbased/CarbonCSVBasedSeqGenStep.java | 38 ++++++++++---------- 4 files changed, 40 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index a821fb0..1af87ca 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -64,7 +64,11 @@ public final class DataTypeUtil { case LONG: return Long.valueOf(msrValue); default: - return Double.valueOf(msrValue); + Double parsedValue = Double.valueOf(msrValue); + if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + return null; + } + return parsedValue; } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/integration/spark/src/test/resources/invalidMeasures.csv ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/resources/invalidMeasures.csv b/integration/spark/src/test/resources/invalidMeasures.csv new file mode 100644 index 0000000..b573188 --- /dev/null +++ b/integration/spark/src/test/resources/invalidMeasures.csv @@ -0,0 +1,3 @@ +India,15000854676378676765378647856378567846578365786347865783456783456783465783465783465783465763478563478567834567834653750834758093478534857348578345789345789347395873483784857348573485734895789347589347589375984759389358347589737583758937589789798437893475893758934758945783475893758947589347587348957389573489758347589734589347589347589347534897589347589347583475893475893475893457893478934575489758973847583947538947583947534897349575375347398733895453444787893758345943458783497874587783597358973589785934789357895378593789357893578935789357893578935785783789357897897893789578935789357893578935789357893578937895783953789578935789357893578935789357893578935789357893789578935789357835378578357835978935357897893535789378953789578935789357893578935789,22.435 +USA,234.43,2224444444444444444444444465558999.23 +Russia,, \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala index 9280447..4446b5e 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala @@ -20,6 +20,7 @@ package org.apache.carbondata.integration.spark.testsuite.dataload import java.io.File +import java.math.BigDecimal import org.apache.spark.sql.Row import org.apache.spark.sql.common.util.CarbonHiveContext._ @@ -60,6 +61,17 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterAll { ) } + test("test data loading with invalid values for mesasures") { + val testData = currentDirectory + "/src/test/resources/invalidMeasures.csv" + sql("drop table if exists invalidMeasures") + sql("CREATE TABLE invalidMeasures (country String, salary double, age decimal(10,2)) STORED BY 'carbondata'") + sql(s"LOAD DATA LOCAL INPATH '$testData' into table invalidMeasures options('Fileheader'='country,salary,age')") + checkAnswer( + sql("SELECT * FROM invalidMeasures"), + Seq(Row("India",null,new BigDecimal("22.44")), Row("Russia",null,null), Row("USA",234.43,null)) + ) + } + override def afterAll { sql("DROP TABLE loadtest") } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f730162/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java index dc7dd22..8959179 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java +++ b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java @@ -994,29 +994,31 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep { } } else { try { - out[memberMapping[dimLen + index] - meta.complexTypes.size()] = - (isNull || msr == null || msr.length() == 0) ? - null : - DataTypeUtil - .getMeasureValueBasedOnDataType(msr, msrDataType[meta.msrMapping[msrCount]], - meta.carbonMeasures[meta.msrMapping[msrCount]]); - } catch (NumberFormatException e) { - try { - msr = msr.replaceAll(",", ""); - out[memberMapping[dimLen + index] - meta.complexTypes.size()] = DataTypeUtil + if (!isNull && null != msr && msr.length() > 0) { + Object measureValueBasedOnDataType = DataTypeUtil .getMeasureValueBasedOnDataType(msr, msrDataType[meta.msrMapping[msrCount]], meta.carbonMeasures[meta.msrMapping[msrCount]]); - } catch (NumberFormatException ex) { - addEntryToBadRecords(r, j, columnName, msrDataType[meta.msrMapping[msrCount]].name()); - if (badRecordConvertNullDisable) { - return null; + if (null == measureValueBasedOnDataType) { + addEntryToBadRecords(r, j, columnName, + msrDataType[meta.msrMapping[msrCount]].name()); + if (badRecordConvertNullDisable) { + return null; + } + LOGGER.warn("Cannot convert : " + msr + + " to Numeric type value. Value considered as null."); } - LOGGER.warn("Cant not convert : " + msr - + " to Numeric type value. Value considered as null."); - out[memberMapping[dimLen + index] - meta.complexTypes.size()] = null; + out[memberMapping[dimLen + index] - meta.complexTypes.size()] = + measureValueBasedOnDataType; } + } catch (NumberFormatException e) { + addEntryToBadRecords(r, j, columnName, msrDataType[meta.msrMapping[msrCount]].name()); + if (badRecordConvertNullDisable) { + return null; + } + LOGGER.warn( + "Cannot convert : " + msr + " to Numeric type value. Value considered as null."); + out[memberMapping[dimLen + index] - meta.complexTypes.size()] = null; } - } index++;