This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 41a756f [CARBONDATA-4188] Fixed select query with small table page
size after alter add column
41a756f is described below
commit 41a756fff7d8774fd919540561d1c39cfc24b9b4
Author: Nihal ojha <[email protected]>
AuthorDate: Mon May 17 15:30:24 2021 +0530
[CARBONDATA-4188] Fixed select query with small table page size after alter
add column
Why is this PR needed?
Select query on table with long string data type and small page size throws
ArrayIndexOutOfBoudException after alter add columns.
Query fails because after changing the schema, the number of rows set in
bitsetGroup(RestructureIncludeFilterExecutorImpl.applyFilter()) for pages
is not correct.
What changes were proposed in this PR?
Set the correct number of rows inside every page of bitsetGroup.
This closes #4137
---
.../carbondata/core/scan/filter/FilterUtil.java | 29 ++++++---------
.../executer/RangeValueFilterExecutorImpl.java | 4 +-
.../RestructureExcludeFilterExecutorImpl.java | 6 +--
.../RestructureIncludeFilterExecutorImpl.java | 6 +--
...velRangeGreaterThanEqualFilterExecutorImpl.java | 4 +-
...RowLevelRangeGreaterThanFilterExecutorImpl.java | 4 +-
...wLevelRangeLessThanEqualFilterExecutorImpl.java | 4 +-
.../RowLevelRangeLessThanFilterExecutorImpl.java | 4 +-
.../core/scan/filter/FilterUtilTest.java | 43 ++++++++++++++++------
9 files changed, 52 insertions(+), 52 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
index 9dc6b5b..1acae54 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
@@ -30,7 +30,6 @@ import java.util.Set;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
@@ -81,6 +80,7 @@ import
org.apache.carbondata.core.scan.filter.resolver.RowLevelRangeFilterResolv
import
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.ColumnResolvedFilterInfo;
import
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
+import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks;
import org.apache.carbondata.core.scan.result.vector.CarbonDictionary;
import org.apache.carbondata.core.util.BitSetGroup;
import org.apache.carbondata.core.util.ByteUtil;
@@ -898,30 +898,23 @@ public final class FilterUtil {
}
/**
- * This method will create default bitset group. Applicable for restructure
scenarios.
+ * This method will create bit set group for particular raw blocklet column
chunk.
+ * Applicable for restructure scenarios.
*
- * @param pageCount
- * @param totalRowCount
+ * @param rawBlockletColumnChunks
* @param defaultValue
* @return
*/
- public static BitSetGroup createBitSetGroupWithDefaultValue(int pageCount,
int totalRowCount,
- boolean defaultValue) {
+ public static BitSetGroup
createBitSetGroupWithColumnChunk(RawBlockletColumnChunks
+ rawBlockletColumnChunks, boolean defaultValue) {
+ int pageCount = rawBlockletColumnChunks.getDataBlock().numberOfPages();
BitSetGroup bitSetGroup = new BitSetGroup(pageCount);
- int numberOfRows =
CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
- int pagesTobeFullFilled = totalRowCount / numberOfRows;
- int rowCountForLastPage = totalRowCount % numberOfRows;
- for (int i = 0; i < pagesTobeFullFilled; i++) {
- BitSet bitSet = new BitSet(numberOfRows);
- bitSet.set(0, numberOfRows, defaultValue);
+ for (int i = 0; i < pageCount; i++) {
+ int pageRowCount =
rawBlockletColumnChunks.getDataBlock().getPageRowCount(i);
+ BitSet bitSet = new BitSet(pageRowCount);
+ bitSet.set(0, pageRowCount, defaultValue);
bitSetGroup.setBitSet(bitSet, i);
}
- // create and fill bitset for the last page if any records are left
- if (rowCountForLastPage > 0) {
- BitSet bitSet = new BitSet(rowCountForLastPage);
- bitSet.set(0, rowCountForLastPage, defaultValue);
- bitSetGroup.setBitSet(bitSet, pagesTobeFullFilled);
- }
return bitSetGroup;
}
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecutorImpl.java
index e4daee3..86d95c9 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecutorImpl.java
@@ -375,9 +375,7 @@ public class RangeValueFilterExecutorImpl implements
FilterExecutor {
// false, in that scenario the default values of the column should be
shown.
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock) {
- int numberOfRows = blockChunkHolder.getDataBlock().numRows();
- return FilterUtil.createBitSetGroupWithDefaultValue(
- blockChunkHolder.getDataBlock().numberOfPages(), numberOfRows, true);
+ return FilterUtil.createBitSetGroupWithColumnChunk(blockChunkHolder,
true);
}
int chunkIndex = segmentProperties.getDimensionOrdinalToChunkMapping()
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java
index 848d0f4..30bfe12 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java
@@ -50,10 +50,8 @@ public class RestructureExcludeFilterExecutorImpl extends
RestructureEvaluatorIm
@Override
public BitSetGroup applyFilter(RawBlockletColumnChunks
rawBlockletColumnChunks,
boolean useBitsetPipeLine) {
- int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
- return FilterUtil
-
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
- numberOfRows, !isDefaultValuePresentInFilterValues);
+ return FilterUtil.createBitSetGroupWithColumnChunk(rawBlockletColumnChunks,
+ !isDefaultValuePresentInFilterValues);
}
@Override
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java
index 364a2af..67801c4 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java
@@ -49,10 +49,8 @@ public class RestructureIncludeFilterExecutorImpl extends
RestructureEvaluatorIm
@Override
public BitSetGroup applyFilter(RawBlockletColumnChunks
rawBlockletColumnChunks,
boolean useBitsetPipeLine) {
- int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
- return FilterUtil.createBitSetGroupWithDefaultValue(
- rawBlockletColumnChunks.getDataBlock().numberOfPages(),
- numberOfRows, isDefaultValuePresentInFilterValues);
+ return FilterUtil.createBitSetGroupWithColumnChunk(rawBlockletColumnChunks,
+ isDefaultValuePresentInFilterValues);
}
@Override
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanEqualFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanEqualFilterExecutorImpl.java
index 3857a2b..8616291 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanEqualFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanEqualFilterExecutorImpl.java
@@ -217,10 +217,8 @@ public class
RowLevelRangeGreaterThanEqualFilterExecutorImpl extends RowLevelFil
boolean useBitsetPipeLine) throws IOException {
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock[0] &&
!isMeasurePresentInCurrentBlock[0]) {
- int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
return FilterUtil
-
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
- numberOfRows, true);
+ .createBitSetGroupWithColumnChunk(rawBlockletColumnChunks, true);
}
if (isDimensionPresentInCurrentBlock[0]) {
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanFilterExecutorImpl.java
index 50ce0ca..f3aff26 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGreaterThanFilterExecutorImpl.java
@@ -275,10 +275,8 @@ public class RowLevelRangeGreaterThanFilterExecutorImpl
extends RowLevelFilterEx
boolean useBitsetPipeLine) throws IOException {
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock[0] &&
!isMeasurePresentInCurrentBlock[0]) {
- int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
return FilterUtil
-
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
- numberOfRows, true);
+ .createBitSetGroupWithColumnChunk(rawBlockletColumnChunks, true);
}
if (isDimensionPresentInCurrentBlock[0]) {
int chunkIndex =
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecutorImpl.java
index f4800be..c751891 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecutorImpl.java
@@ -217,10 +217,8 @@ public class RowLevelRangeLessThanEqualFilterExecutorImpl
extends RowLevelFilter
boolean useBitsetPipeLine) throws IOException {
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock[0] &&
!isMeasurePresentInCurrentBlock[0]) {
- int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
return FilterUtil
-
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
- numberOfRows, true);
+ .createBitSetGroupWithColumnChunk(rawBlockletColumnChunks, true);
}
if (isDimensionPresentInCurrentBlock[0]) {
int chunkIndex =
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecutorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecutorImpl.java
index 125e3a9..489bb84 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecutorImpl.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecutorImpl.java
@@ -214,10 +214,8 @@ public class RowLevelRangeLessThanFilterExecutorImpl
extends RowLevelFilterExecu
boolean useBitsetPipeLine) throws IOException {
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock[0] &&
!isMeasurePresentInCurrentBlock[0]) {
- int numberOfRows = rawBlockletColumnChunks.getDataBlock().numRows();
return FilterUtil
-
.createBitSetGroupWithDefaultValue(rawBlockletColumnChunks.getDataBlock().numberOfPages(),
- numberOfRows, true);
+ .createBitSetGroupWithColumnChunk(rawBlockletColumnChunks, true);
}
if (isDimensionPresentInCurrentBlock[0]) {
int chunkIndex =
diff --git
a/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
b/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
index d15e852..94ae419 100644
---
a/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
+++
b/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
@@ -21,7 +21,12 @@ import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
+import mockit.Mock;
+import mockit.MockUp;
+
import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.DataRefNode;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.scan.expression.ColumnExpression;
@@ -32,6 +37,7 @@ import
org.apache.carbondata.core.scan.expression.conditional.ListExpression;
import
org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException;
import org.apache.carbondata.core.scan.expression.logical.AndExpression;
import org.apache.carbondata.core.scan.expression.logical.TrueExpression;
+import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks;
import org.apache.carbondata.core.util.BitSetGroup;
import org.junit.Before;
@@ -223,17 +229,6 @@ public class FilterUtilTest {
DataTypes.STRING) instanceof ColumnFilterInfo);
}
- @Test public void testCreateBitSetGroupWithDefaultValue() {
- // test for exactly divisible values
- BitSetGroup bitSetGroupWithDefaultValue =
- FilterUtil.createBitSetGroupWithDefaultValue(14, 448000, true);
- assertTrue(bitSetGroupWithDefaultValue.getNumberOfPages() == 14);
- // test for remainder values
- bitSetGroupWithDefaultValue =
- FilterUtil.createBitSetGroupWithDefaultValue(15, 448200, true);
- assertTrue(bitSetGroupWithDefaultValue.getNumberOfPages() == 15);
- }
-
@Test public void testRemoveInExpressionNodeWithPositionIdColumn() {
List<Expression> children = new
ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
// create literal expression
@@ -257,6 +252,32 @@ public class FilterUtilTest {
assert (((AndExpression) expression).getRight() instanceof TrueExpression);
}
+ @Test public void testCreateBitSetGroupWithColumnChunk() {
+ BlockletDataRefNode blockletDataRefNode = new
MockUp<BlockletDataRefNode>() {
+ @Mock
+ public int numberOfPages() {
+ return 2;
+ }
+ @Mock
+ public int getPageRowCount(int pageNumber) {
+ if (pageNumber==0) {
+ return 94;
+ } else {
+ return 6;
+ }
+ }
+ }.getMockInstance();
+ RawBlockletColumnChunks rawBlockletColumnChunks = new
MockUp<RawBlockletColumnChunks>() {
+ @Mock
+ public DataRefNode getDataBlock() {
+ return blockletDataRefNode;
+ }
+ }.getMockInstance();
+ BitSetGroup bitSetGroupWithColumnChunk =
+ FilterUtil.createBitSetGroupWithColumnChunk(rawBlockletColumnChunks,
true);
+ assertTrue(bitSetGroupWithColumnChunk.getNumberOfPages() == 2);
+ }
+
@Test public void testRemoveInExpressionNodeWithDifferentColumn() {
List<Expression> children = new
ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
// create literal expression