[CARBONDATA-2975] DefaultValue choosing and removeNullValues on rangefilters is incorrect
This closes #2770 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6aa2a90b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6aa2a90b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6aa2a90b Branch: refs/heads/branch-1.5 Commit: 6aa2a90bf113132880694e305a264cdccc4c5693 Parents: 18fbdfc Author: dhatchayani <[email protected]> Authored: Wed Sep 26 20:49:06 2018 +0530 Committer: ravipesala <[email protected]> Committed: Thu Oct 4 17:22:14 2018 +0530 ---------------------------------------------------------------------- .../apache/carbondata/core/scan/filter/FilterUtil.java | 11 ++++++----- .../filter/executer/RangeValueFilterExecuterImpl.java | 2 +- .../executer/RowLevelRangeGrtThanFiterExecuterImpl.java | 10 ++++++++-- .../RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java | 10 ++++++++-- .../RowLevelRangeLessThanEqualFilterExecuterImpl.java | 11 +++++++---- .../RowLevelRangeLessThanFilterExecuterImpl.java | 11 +++++++---- .../spark/testsuite/sortcolumns/TestSortColumns.scala | 6 ++++++ 7 files changed, 43 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index b4354d2..fe92c42 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -1947,11 +1947,12 @@ public final class FilterUtil { public static void removeNullValues(DimensionColumnPage dimensionColumnPage, BitSet bitSet, byte[] defaultValue) { if (!bitSet.isEmpty()) { - if (null != dimensionColumnPage.getNullBits() && !dimensionColumnPage.getNullBits().isEmpty() - && !dimensionColumnPage.isExplicitSorted() && !dimensionColumnPage.isAdaptiveEncoded()) { - for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) { - if (dimensionColumnPage.getNullBits().get(i)) { - bitSet.flip(i); + if (null != dimensionColumnPage.getNullBits()) { + if (!dimensionColumnPage.getNullBits().isEmpty()) { + for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) { + if (dimensionColumnPage.getNullBits().get(i)) { + bitSet.flip(i); + } } } } else { http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java index b9729db..886a13b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java @@ -632,7 +632,7 @@ public class RangeValueFilterExecuterImpl implements FilterExecuter { } else { if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - } else { + } else if (!dimensionColumnPage.isAdaptiveEncoded()) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index 63a5976..c6835f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -28,6 +28,7 @@ import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; @@ -387,9 +388,14 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute } else { bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows); } + byte[] defaultValue = null; + if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { + defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } else if (!dimensionColumnPage.isAdaptiveEncoded()) { + defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; + } if (dimensionColumnPage.isNoDicitionaryColumn()) { - FilterUtil.removeNullValues(dimensionColumnPage, bitSet, - CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY); + FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java index 0f9cfae..afb646a 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java @@ -28,6 +28,7 @@ import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; @@ -386,9 +387,14 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte } else { bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows); } + byte[] defaultValue = null; + if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { + defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } else if (!dimensionColumnPage.isAdaptiveEncoded()) { + defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; + } if (dimensionColumnPage.isNoDicitionaryColumn()) { - FilterUtil.removeNullValues(dimensionColumnPage, bitSet, - CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY); + FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java index eff6509..647425d 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java @@ -383,13 +383,16 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter BitSet bitSet = null; if (dimensionColumnPage.isExplicitSorted()) { bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnPage, numerOfRows, - defaultValue); + dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue); } else { - bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, defaultValue); + bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, + dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue); + } + if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { + defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } if (dimensionColumnPage.isNoDicitionaryColumn()) { - FilterUtil.removeNullValues(dimensionColumnPage, bitSet, - CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY); + FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java index 7c48180..4ef9999 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java @@ -380,13 +380,16 @@ public class RowLevelRangeLessThanFilterExecuterImpl extends RowLevelFilterExecu BitSet bitSet = null; if (dimensionColumnPage.isExplicitSorted()) { bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnPage, numerOfRows, - defaultValue); + dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue); } else { - bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, defaultValue); + bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, + dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue); + } + if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { + defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } if (dimensionColumnPage.isNoDicitionaryColumn()) { - FilterUtil.removeNullValues(dimensionColumnPage, bitSet, - CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY); + FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala index d9cb0e0..8d4dba3 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala @@ -343,6 +343,12 @@ class TestSortColumns extends QueryTest with BeforeAndAfterAll { sql(s"LOAD DATA local inpath '$resourcesPath/numeric_column_invalid_values.csv' INTO TABLE test_sort_col OPTIONS('FILEHEADER'='id,name,age')") // compare hive and carbon data checkAnswer(sql("select * from test_sort_col_hive"), sql("select * from test_sort_col")) + checkAnswer(sql("select * from test_sort_col_hive where age < 25"), sql("select * from test_sort_col where age < 25")) + checkAnswer(sql("select * from test_sort_col_hive where age <= 25"), sql("select * from test_sort_col where age <= 25")) + checkAnswer(sql("select * from test_sort_col_hive where age > 25"), sql("select * from test_sort_col where age > 25")) + checkAnswer(sql("select * from test_sort_col_hive where age >= 25"), sql("select * from test_sort_col where age >= 25")) + checkAnswer(sql("select * from test_sort_col_hive where age is null"), sql("select * from test_sort_col where age is null")) + checkAnswer(sql("select * from test_sort_col_hive where age is not null"), sql("select * from test_sort_col where age is not null")) } test("describe formatted for sort_columns") {
