[CARBONDATA-3084]dataload failure fix when float value exceeds the limit Problem: when the float value exceeds the range and we try to insert that data, data load fails.
Analysis: when the value exceeds the range, the max is set as Infinity, so the decimal count of that value will be 0, so when decimal count is zero we go for CodecByAlgorithmForIntegral, so it fails Solution: when the value exceeds, and decimal count is zero , source datatype is float, then select DirectCompressCodec This closes #2903 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6793274c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6793274c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6793274c Branch: refs/heads/branch-1.5 Commit: 6793274c77b22f6cd6fffbe9875b68f28a58df3d Parents: b69f0fc Author: akashrn5 <akashnilu...@gmail.com> Authored: Tue Nov 6 12:36:24 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Wed Nov 21 22:43:46 2018 +0530 ---------------------------------------------------------------------- .../page/encoding/DefaultEncodingFactory.java | 50 ++++++++++++-------- .../primitiveTypes/FloatDataTypeTestCase.scala | 14 ++++++ 2 files changed, 44 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6793274c/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java index 146d5dd..506e1c7 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java @@ -325,32 +325,42 @@ public class DefaultEncodingFactory extends EncodingFactory { //Here we should use the Max abs as max to getDatatype, let's say -1 and -10000000, -1 is max, //but we can't use -1 to getDatatype, we should use -10000000. double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue)); - if (decimalCount == 0) { + if (srcDataType == DataTypes.FLOAT && decimalCount == 0) { + return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue, + minValue, decimalCount, absMaxValue); + } else if (decimalCount == 0) { // short, int, long return selectCodecByAlgorithmForIntegral(stats, false, columnSpec); } else if (decimalCount < 0 && !isComplexPrimitive) { return new DirectCompressCodec(DataTypes.DOUBLE); } else { - // double - // If absMaxValue exceeds LONG.MAX_VALUE, then go for direct compression - if ((Math.pow(10, decimalCount) * absMaxValue) > Long.MAX_VALUE) { - return new DirectCompressCodec(DataTypes.DOUBLE); + return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue, + minValue, decimalCount, absMaxValue); + } + } + + private static ColumnPageCodec getColumnPageCodec(SimpleStatsResult stats, + boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec, DataType srcDataType, + double maxValue, double minValue, int decimalCount, double absMaxValue) { + // double + // If absMaxValue exceeds LONG.MAX_VALUE, then go for direct compression + if ((Math.pow(10, decimalCount) * absMaxValue) > Long.MAX_VALUE) { + return new DirectCompressCodec(DataTypes.DOUBLE); + } else { + long max = (long) (Math.pow(10, decimalCount) * absMaxValue); + DataType adaptiveDataType = fitLongMinMax(max, 0); + DataType deltaDataType = compareMinMaxAndSelectDataType( + (long) (Math.pow(10, decimalCount) * (maxValue - minValue))); + if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) { + return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, stats, + isInvertedIndex(isComplexPrimitive, columnSpec)); + } else if (adaptiveDataType.getSizeInBytes() < DataTypes.DOUBLE.getSizeInBytes() || ( + (isComplexPrimitive) && (adaptiveDataType.getSizeInBytes() == DataTypes.DOUBLE + .getSizeInBytes()))) { + return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats, + isInvertedIndex(isComplexPrimitive, columnSpec)); } else { - long max = (long) (Math.pow(10, decimalCount) * absMaxValue); - DataType adaptiveDataType = fitLongMinMax(max, 0); - DataType deltaDataType = compareMinMaxAndSelectDataType( - (long) (Math.pow(10, decimalCount) * (maxValue - minValue))); - if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) { - return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, stats, - isInvertedIndex(isComplexPrimitive, columnSpec)); - } else if (adaptiveDataType.getSizeInBytes() < DataTypes.DOUBLE.getSizeInBytes() || ( - (isComplexPrimitive) && (adaptiveDataType.getSizeInBytes() == DataTypes.DOUBLE - .getSizeInBytes()))) { - return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats, - isInvertedIndex(isComplexPrimitive, columnSpec)); - } else { - return new DirectCompressCodec(DataTypes.DOUBLE); - } + return new DirectCompressCodec(DataTypes.DOUBLE); } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6793274c/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala index 076ef2c..9574cd5 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala @@ -26,6 +26,8 @@ import org.scalatest.BeforeAndAfterAll class FloatDataTypeTestCase extends QueryTest with BeforeAndAfterAll { override def beforeAll { + sql("DROP TABLE IF EXISTS datatype_float_hive") + sql("DROP TABLE IF EXISTS datatype_float_byte") sql("DROP TABLE IF EXISTS tfloat") sql(""" CREATE TABLE IF NOT EXISTS tfloat @@ -57,7 +59,19 @@ class FloatDataTypeTestCase extends QueryTest with BeforeAndAfterAll { Seq(Row(24.56))) } + test("test when float range exceeds") { + sql("create table datatype_float_hive(f float, b byte)") + sql("insert into datatype_float_hive select 1.7976931348623157E308,-127") + sql("create table datatype_float_byte(f float, b byte) using carbon") + sql("insert into datatype_float_byte select 1.7976931348623157E308,-127") + checkAnswer( + sql("SELECT f FROM datatype_float_byte"), + sql("SELECT f FROM datatype_float_hive")) + } + override def afterAll { sql("DROP TABLE IF EXISTS tfloat") + sql("DROP TABLE IF EXISTS datatype_float_byte") + sql("DROP TABLE IF EXISTS datatype_float_hive") } } \ No newline at end of file