[48/50] [abbrv] carbondata git commit: [CARBONDATA-3084]dataload failure fix when float value exceeds the limit

ravipesala Wed, 21 Nov 2018 10:06:16 -0800

[CARBONDATA-3084]dataload failure fix when float value exceeds the limit

Problem:
when the float value exceeds the range and we try to insert that data, data 
load fails.


Analysis:
when the value exceeds the range, the max is set as Infinity, so the decimal 
count of that value will be 0, so when decimal count is zero we go for 
CodecByAlgorithmForIntegral, so it fails

Solution:
when the value exceeds, and decimal count is zero , source datatype is float, 
then select DirectCompressCodec

This closes #2903


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6793274c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6793274c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6793274c

Branch: refs/heads/branch-1.5
Commit: 6793274c77b22f6cd6fffbe9875b68f28a58df3d
Parents: b69f0fc
Author: akashrn5 <akashnilu...@gmail.com>
Authored: Tue Nov 6 12:36:24 2018 +0530
Committer: ravipesala <ravi.pes...@gmail.com>
Committed: Wed Nov 21 22:43:46 2018 +0530

----------------------------------------------------------------------
 .../page/encoding/DefaultEncodingFactory.java   | 50 ++++++++++++--------
 .../primitiveTypes/FloatDataTypeTestCase.scala  | 14 ++++++
 2 files changed, 44 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6793274c/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
index 146d5dd..506e1c7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
@@ -325,32 +325,42 @@ public class DefaultEncodingFactory extends 
EncodingFactory {
     //Here we should use the Max abs as max to getDatatype, let's say -1 and 
-10000000, -1 is max,
     //but we can't use -1 to getDatatype, we should use -10000000.
     double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue));
-    if (decimalCount == 0) {
+    if (srcDataType == DataTypes.FLOAT && decimalCount == 0) {
+      return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, 
srcDataType, maxValue,
+          minValue, decimalCount, absMaxValue);
+    } else if (decimalCount == 0) {
       // short, int, long
       return selectCodecByAlgorithmForIntegral(stats, false, columnSpec);
     } else if (decimalCount < 0 && !isComplexPrimitive) {
       return new DirectCompressCodec(DataTypes.DOUBLE);
     } else {
-      // double
-      // If absMaxValue exceeds LONG.MAX_VALUE, then go for direct compression
-      if ((Math.pow(10, decimalCount) * absMaxValue) > Long.MAX_VALUE) {
-        return new DirectCompressCodec(DataTypes.DOUBLE);
+      return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, 
srcDataType, maxValue,
+          minValue, decimalCount, absMaxValue);
+    }
+  }
+
+  private static ColumnPageCodec getColumnPageCodec(SimpleStatsResult stats,
+      boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec, DataType 
srcDataType,
+      double maxValue, double minValue, int decimalCount, double absMaxValue) {
+    // double
+    // If absMaxValue exceeds LONG.MAX_VALUE, then go for direct compression
+    if ((Math.pow(10, decimalCount) * absMaxValue) > Long.MAX_VALUE) {
+      return new DirectCompressCodec(DataTypes.DOUBLE);
+    } else {
+      long max = (long) (Math.pow(10, decimalCount) * absMaxValue);
+      DataType adaptiveDataType = fitLongMinMax(max, 0);
+      DataType deltaDataType = compareMinMaxAndSelectDataType(
+          (long) (Math.pow(10, decimalCount) * (maxValue - minValue)));
+      if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) {
+        return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, 
stats,
+            isInvertedIndex(isComplexPrimitive, columnSpec));
+      } else if (adaptiveDataType.getSizeInBytes() < 
DataTypes.DOUBLE.getSizeInBytes() || (
+          (isComplexPrimitive) && (adaptiveDataType.getSizeInBytes() == 
DataTypes.DOUBLE
+              .getSizeInBytes()))) {
+        return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats,
+            isInvertedIndex(isComplexPrimitive, columnSpec));
       } else {
-        long max = (long) (Math.pow(10, decimalCount) * absMaxValue);
-        DataType adaptiveDataType = fitLongMinMax(max, 0);
-        DataType deltaDataType = compareMinMaxAndSelectDataType(
-            (long) (Math.pow(10, decimalCount) * (maxValue - minValue)));
-        if (adaptiveDataType.getSizeInBytes() > 
deltaDataType.getSizeInBytes()) {
-          return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, 
stats,
-              isInvertedIndex(isComplexPrimitive, columnSpec));
-        } else if (adaptiveDataType.getSizeInBytes() < 
DataTypes.DOUBLE.getSizeInBytes() || (
-            (isComplexPrimitive) && (adaptiveDataType.getSizeInBytes() == 
DataTypes.DOUBLE
-                .getSizeInBytes()))) {
-          return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, 
stats,
-              isInvertedIndex(isComplexPrimitive, columnSpec));
-        } else {
-          return new DirectCompressCodec(DataTypes.DOUBLE);
-        }
+        return new DirectCompressCodec(DataTypes.DOUBLE);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6793274c/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
index 076ef2c..9574cd5 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
@@ -26,6 +26,8 @@ import org.scalatest.BeforeAndAfterAll
 class FloatDataTypeTestCase extends QueryTest with BeforeAndAfterAll {
 
   override def beforeAll {
+    sql("DROP TABLE IF EXISTS datatype_float_hive")
+    sql("DROP TABLE IF EXISTS datatype_float_byte")
     sql("DROP TABLE IF EXISTS tfloat")
     sql("""
            CREATE TABLE IF NOT EXISTS tfloat
@@ -57,7 +59,19 @@ class FloatDataTypeTestCase extends QueryTest with 
BeforeAndAfterAll {
       Seq(Row(24.56)))
   }
 
+  test("test when float range exceeds") {
+    sql("create table datatype_float_hive(f float, b byte)")
+    sql("insert into datatype_float_hive select 1.7976931348623157E308,-127")
+    sql("create table datatype_float_byte(f float, b byte) using carbon")
+    sql("insert into datatype_float_byte select 1.7976931348623157E308,-127")
+    checkAnswer(
+      sql("SELECT f FROM datatype_float_byte"),
+      sql("SELECT f FROM datatype_float_hive"))
+  }
+
   override def afterAll {
     sql("DROP TABLE IF EXISTS tfloat")
+    sql("DROP TABLE IF EXISTS datatype_float_byte")
+    sql("DROP TABLE IF EXISTS datatype_float_hive")
   }
 }
\ No newline at end of file

[48/50] [abbrv] carbondata git commit: [CARBONDATA-3084]dataload failure fix when float value exceeds the limit

Reply via email to