[CARBONDATA-1955] Delta DataType calculation is incorrect for long type Problem: In case of Long type, the delta data type is always choosing the Long type. But it should choose the datatype based on diff (max-min) of max and min values. Solution: Corrected to choose the delta data type based on max and min values.
This closes #1744 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/38038add Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/38038add Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/38038add Branch: refs/heads/branch-1.3 Commit: 38038add7065a05957f84d884298c901253f3d4d Parents: 69de387 Author: mohammadshahidkhan <[email protected]> Authored: Thu Dec 28 18:12:37 2017 +0530 Committer: ravipesala <[email protected]> Committed: Thu Jan 4 20:26:31 2018 +0530 ---------------------------------------------------------------------- .../page/encoding/DefaultEncodingFactory.java | 9 +- .../page/encoding/TestEncodingFactory.java | 92 ++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/38038add/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java index 5c668be..00f7a0f 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java @@ -215,8 +215,13 @@ public class DefaultEncodingFactory extends EncodingFactory { } else if (dataType == DataTypes.INT) { value = (long) (int) max - (long) (int) min; } else if (dataType == DataTypes.LONG) { - // TODO: add overflow detection and return delta type - return DataTypes.LONG; + value = (long) max - (long) min; + // The subtraction overflowed iff the operands have opposing signs + // and the result's sign differs from the minuend. + boolean overflow = (((long) max ^ (long) min) & ((long) max ^ value)) < 0; + if (overflow) { + return DataTypes.LONG; + } } else if (dataType == DataTypes.DOUBLE) { return DataTypes.LONG; } else { http://git-wip-us.apache.org/repos/asf/carbondata/blob/38038add/core/src/test/java/org/apache/carbondata/core/datastore/page/encoding/TestEncodingFactory.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/datastore/page/encoding/TestEncodingFactory.java b/core/src/test/java/org/apache/carbondata/core/datastore/page/encoding/TestEncodingFactory.java new file mode 100644 index 0000000..52a4de3 --- /dev/null +++ b/core/src/test/java/org/apache/carbondata/core/datastore/page/encoding/TestEncodingFactory.java @@ -0,0 +1,92 @@ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.encoding; + +import org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec; +import org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec; +import org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec; +import org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector; +import org.apache.carbondata.core.metadata.datatype.DataTypes; + +import junit.framework.TestCase; +import org.junit.Test; + + /** + * The class is meant to test the different type of ColumnPageCodec + * base datatype and min and max values. + */ +public class TestEncodingFactory extends TestCase { + + @Test public void testSelectProperDeltaType() { + PrimitivePageStatsCollector primitivePageStatsCollector = + PrimitivePageStatsCollector.newInstance(DataTypes.LONG); + // for Byte + primitivePageStatsCollector.update((long) Byte.MAX_VALUE); + ColumnPageCodec columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof AdaptiveIntegralCodec); + assert (DataTypes.BYTE == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType()); + // for Short + primitivePageStatsCollector.update((long) Short.MAX_VALUE); + columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof AdaptiveIntegralCodec); + assert (DataTypes.SHORT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType()); + // for int + primitivePageStatsCollector.update((long) Integer.MAX_VALUE); + columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof AdaptiveIntegralCodec); + assert (DataTypes.INT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType()); + // for long + primitivePageStatsCollector.update(Long.MAX_VALUE); + columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof DirectCompressCodec); + assert ("DirectCompressCodec".equals(columnPageCodec.getName())); + } + + @Test public void testSelectProperDeltaType2() { + PrimitivePageStatsCollector primitivePageStatsCollector = + PrimitivePageStatsCollector.newInstance(DataTypes.LONG); + // for Byte + primitivePageStatsCollector.update((long) 200); + ColumnPageCodec columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof AdaptiveDeltaIntegralCodec); + assert (DataTypes.BYTE == ((AdaptiveDeltaIntegralCodec) columnPageCodec).getTargetDataType()); + // for Short + primitivePageStatsCollector.update((long) 634767); + columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof AdaptiveIntegralCodec); + assert (DataTypes.SHORT_INT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType()); + // for int + primitivePageStatsCollector.update((long) (Integer.MAX_VALUE + 200)); + columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof AdaptiveIntegralCodec); + assert (DataTypes.INT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType()); + // for int + primitivePageStatsCollector.update(Long.MAX_VALUE); + columnPageCodec = + DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector); + assert (columnPageCodec instanceof DirectCompressCodec); + assert ("DirectCompressCodec".equals(columnPageCodec.getName())); + } +}
