Repository: carbondata Updated Branches: refs/heads/master 38b53fefb -> 0bbfa8597
[CARBONDATA-1854] Add support for implicit column filter Whenever a filter is applied on implicit column, filter is applied at blocklet level to scan/read only the valid blocklets. This will ensure that only blocklets that contain the required data are read. This closes #1619 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0bbfa859 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0bbfa859 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0bbfa859 Branch: refs/heads/master Commit: 0bbfa8597a17ed90f081f6d3f78d62c8c4bcc1bd Parents: 38b53fe Author: manishgupta88 <[email protected]> Authored: Tue Dec 5 16:36:56 2017 +0530 Committer: Venkata Ramana G <[email protected]> Committed: Wed Dec 6 21:39:18 2017 +0530 ---------------------------------------------------------------------- .../carbondata/core/datastore/DataRefNode.java | 8 ++ .../impl/btree/AbstractBTreeLeafNode.java | 5 + .../datastore/impl/btree/BTreeNonLeafNode.java | 5 + .../impl/btree/BlockletBTreeLeafNode.java | 18 ++++ .../blockletindex/BlockletDataMap.java | 36 ++++++- .../BlockletDataRefNodeWrapper.java | 21 +++- .../core/metadata/blocklet/BlockletInfo.java | 10 ++ .../core/scan/filter/ColumnFilterInfo.java | 20 ++++ .../carbondata/core/scan/filter/FilterUtil.java | 54 +++++++--- .../filter/executer/AndFilterExecuterImpl.java | 61 ++++++++++- .../executer/ImplicitColumnFilterExecutor.java | 47 +++++++++ .../ImplicitIncludeFilterExecutorImpl.java | 105 +++++++++++++++++++ .../visitor/FilterInfoTypeVisitorFactory.java | 4 + .../visitor/ImplicitColumnVisitor.java | 60 +++++++++++ 14 files changed, 435 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java index 8914196..37acb02 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java @@ -128,4 +128,12 @@ public interface DataRefNode { * @return */ int numberOfPages(); + + /** + * Return the number of rows for a give page + * + * @param pageNumber + * @return + */ + int getPageRowCount(int pageNumber); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/AbstractBTreeLeafNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/AbstractBTreeLeafNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/AbstractBTreeLeafNode.java index dfd35bc..19b1f1c 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/AbstractBTreeLeafNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/AbstractBTreeLeafNode.java @@ -239,4 +239,9 @@ public abstract class AbstractBTreeLeafNode implements BTreeNode { public BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache() { return deleteDeltaDataCache; } + + @Override + public int getPageRowCount(int pageNumber) { + throw new UnsupportedOperationException("Unsupported operation"); + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java index 01c0177..62fcabf 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java @@ -254,4 +254,9 @@ public class BTreeNonLeafNode implements BTreeNode { // TODO Auto-generated method stub throw new UnsupportedOperationException("Unsupported operation"); } + + @Override + public int getPageRowCount(int pageNumber) { + throw new UnsupportedOperationException("Unsupported operation"); + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java index 2e52770..cf8a2ad 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java @@ -18,6 +18,7 @@ package org.apache.carbondata.core.datastore.impl.btree; import java.io.IOException; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; import org.apache.carbondata.core.datastore.BTreeBuilderInfo; import org.apache.carbondata.core.datastore.FileHolder; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; @@ -47,6 +48,8 @@ public class BlockletBTreeLeafNode extends AbstractBTreeLeafNode { */ private int numberOfPages; + private int[] pageRowCount; + /** * Create a leaf node * @@ -82,6 +85,17 @@ public class BlockletBTreeLeafNode extends AbstractBTreeLeafNode { this.nodeNumber = nodeNumber; this.numberOfPages = builderInfos.getFooterList().get(0).getBlockletList().get(leafIndex).getNumberOfPages(); + this.pageRowCount = new int[numberOfPages]; + int numberOfPagesCompletelyFilled = + numberOfKeys / CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + int lastPageRowCount = + numberOfKeys % CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + for (int i = 0; i < numberOfPagesCompletelyFilled; i++) { + pageRowCount[i] = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + } + if (lastPageRowCount > 0) { + pageRowCount[pageRowCount.length - 1] = lastPageRowCount; + } } /** @@ -138,4 +152,8 @@ public class BlockletBTreeLeafNode extends AbstractBTreeLeafNode { @Override public int numberOfPages() { return numberOfPages; } + + @Override public int getPageRowCount(int pageNumber) { + return this.pageRowCount[pageNumber]; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java index 43e265d..fd514ea 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java @@ -57,6 +57,7 @@ import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; import org.apache.carbondata.core.scan.filter.FilterUtil; import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.core.util.DataFileFooterConverter; @@ -366,9 +367,12 @@ public class BlockletDataMap implements DataMap, Cacheable { FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null); while (startIndex <= endIndex) { DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(startIndex); - BitSet bitSet = filterExecuter.isScanRequired(getMinMaxValue(unsafeRow, MAX_VALUES_INDEX), - getMinMaxValue(unsafeRow, MIN_VALUES_INDEX)); - if (!bitSet.isEmpty()) { + String filePath = new String(unsafeRow.getByteArray(FILE_PATH_INDEX), + CarbonCommonConstants.DEFAULT_CHARSET_CLASS); + boolean isValid = + addBlockBasedOnMinMaxValue(filterExecuter, getMinMaxValue(unsafeRow, MAX_VALUES_INDEX), + getMinMaxValue(unsafeRow, MIN_VALUES_INDEX), filePath); + if (isValid) { blocklets.add(createBlocklet(unsafeRow, startIndex)); } startIndex++; @@ -378,6 +382,32 @@ public class BlockletDataMap implements DataMap, Cacheable { return blocklets; } + /** + * select the blocks based on column min and max value + * + * @param filterExecuter + * @param maxValue + * @param minValue + * @param filePath + * @return + */ + private boolean addBlockBasedOnMinMaxValue(FilterExecuter filterExecuter, byte[][] maxValue, + byte[][] minValue, String filePath) { + BitSet bitSet = null; + if (filterExecuter instanceof ImplicitColumnFilterExecutor) { + String uniqueBlockPath = filePath.substring(filePath.lastIndexOf("/Part") + 1); + bitSet = ((ImplicitColumnFilterExecutor) filterExecuter) + .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue, uniqueBlockPath); + } else { + bitSet = filterExecuter.isScanRequired(maxValue, minValue); + } + if (!bitSet.isEmpty()) { + return true; + } else { + return false; + } + } + public ExtendedBlocklet getDetailedBlocklet(String blockletId) { int index = Integer.parseInt(blockletId); DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(index); http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java index 5e0f4cf..42505ad 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.List; import org.apache.carbondata.core.cache.update.BlockletLevelDeleteDeltaDataCache; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; import org.apache.carbondata.core.datastore.DataRefNode; import org.apache.carbondata.core.datastore.FileHolder; import org.apache.carbondata.core.datastore.block.TableBlockInfo; @@ -48,10 +49,23 @@ public class BlockletDataRefNodeWrapper implements DataRefNode { int[] dimensionLens) { this.blockInfos = blockInfos; // Update row count and page count to blocklet info - for (TableBlockInfo blockInfo: blockInfos) { + for (TableBlockInfo blockInfo : blockInfos) { BlockletDetailInfo detailInfo = blockInfo.getDetailInfo(); detailInfo.getBlockletInfo().setNumberOfRows(detailInfo.getRowCount()); detailInfo.getBlockletInfo().setNumberOfPages(detailInfo.getPagesCount()); + int[] pageRowCount = new int[detailInfo.getPagesCount()]; + int numberOfPagesCompletelyFilled = detailInfo.getRowCount() + / CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + int lastPageRowCount = detailInfo.getRowCount() + % CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + for (int i = 0; i < numberOfPagesCompletelyFilled; i++) { + pageRowCount[i] = + CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + } + if (lastPageRowCount > 0) { + pageRowCount[pageRowCount.length - 1] = lastPageRowCount; + } + detailInfo.getBlockletInfo().setNumberOfRowsPerPage(pageRowCount); } this.index = index; this.dimensionLens = dimensionLens; @@ -138,6 +152,11 @@ public class BlockletDataRefNodeWrapper implements DataRefNode { return blockInfos.get(index).getDetailInfo().getPagesCount(); } + @Override public int getPageRowCount(int pageNumber) { + return blockInfos.get(index).getDetailInfo().getBlockletInfo() + .getNumberOfRowsPerPage()[pageNumber]; + } + public int numberOfNodes() { return blockInfos.size(); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java index 19a4923..f77358f 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java @@ -87,6 +87,8 @@ public class BlockletInfo implements Serializable, Writable { */ private int numberOfPages = 1; + private int[] numberOfRowsPerPage; + /** * @return the numberOfRows */ @@ -307,4 +309,12 @@ public class BlockletInfo implements Serializable, Writable { measureColumnChunk.add(deserializeDataChunk(bytes)); } } + + public int[] getNumberOfRowsPerPage() { + return numberOfRowsPerPage; + } + + public void setNumberOfRowsPerPage(int[] numberOfRowsPerPage) { + this.numberOfRowsPerPage = numberOfRowsPerPage; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java index 6a7fced..aeb3e6d 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java @@ -18,7 +18,11 @@ package org.apache.carbondata.core.scan.filter; import java.io.Serializable; +import java.util.HashSet; import java.util.List; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; public class ColumnFilterInfo implements Serializable { @@ -33,6 +37,7 @@ public class ColumnFilterInfo implements Serializable { */ private List<String> implicitColumnFilterList; private List<Integer> excludeFilterList; + private transient Set<String> implicitDriverColumnFilterList; /** * maintain the no dictionary filter values list. */ @@ -86,6 +91,7 @@ public class ColumnFilterInfo implements Serializable { public void setImplicitColumnFilterList(List<String> implicitColumnFilterList) { this.implicitColumnFilterList = implicitColumnFilterList; + populateBlockIdListForDriverBlockPruning(); } public List<Object> getMeasuresFilterValuesList() { @@ -95,4 +101,18 @@ public class ColumnFilterInfo implements Serializable { public void setMeasuresFilterValuesList(List<Object> measuresFilterValuesList) { this.measuresFilterValuesList = measuresFilterValuesList; } + + public Set<String> getImplicitDriverColumnFilterList() { + return implicitDriverColumnFilterList; + } + + private void populateBlockIdListForDriverBlockPruning() { + implicitDriverColumnFilterList = new HashSet<>(implicitColumnFilterList.size()); + String blockId = null; + for (String blockletId : implicitColumnFilterList) { + blockId = + blockletId.substring(0, blockletId.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR)); + implicitDriverColumnFilterList.add(blockId); + } + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index bb2fe7d..16b3b1f 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -73,6 +73,7 @@ import org.apache.carbondata.core.scan.filter.executer.DimColumnExecuterFilterIn import org.apache.carbondata.core.scan.filter.executer.ExcludeColGroupFilterExecuterImpl; import org.apache.carbondata.core.scan.filter.executer.ExcludeFilterExecuterImpl; import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.executer.ImplicitIncludeFilterExecutorImpl; import org.apache.carbondata.core.scan.filter.executer.IncludeColGroupFilterExecuterImpl; import org.apache.carbondata.core.scan.filter.executer.IncludeFilterExecuterImpl; import org.apache.carbondata.core.scan.filter.executer.MeasureColumnExecuterFilterInfo; @@ -225,23 +226,30 @@ public final class FilterUtil { msrColResolvedFilterInfo, true); } } - if (null != dimColResolvedFilterInfo && dimColResolvedFilterInfo.getDimension().isColumnar()) { - CarbonDimension dimensionFromCurrentBlock = - segmentProperties.getDimensionFromCurrentBlock(dimColResolvedFilterInfo.getDimension()); - if (null != dimensionFromCurrentBlock) { - // update dimension and column index according to the dimension position in current block - DimColumnResolvedFilterInfo dimColResolvedFilterInfoCopyObject = - dimColResolvedFilterInfo.getCopyObject(); - dimColResolvedFilterInfoCopyObject.setDimension(dimensionFromCurrentBlock); - dimColResolvedFilterInfoCopyObject.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); - return new IncludeFilterExecuterImpl(dimColResolvedFilterInfoCopyObject, null, - segmentProperties, false); + if (null != dimColResolvedFilterInfo) { + CarbonDimension dimension = dimColResolvedFilterInfo.getDimension(); + if (dimension.hasEncoding(Encoding.IMPLICIT)) { + return new ImplicitIncludeFilterExecutorImpl(dimColResolvedFilterInfo); + } else if (dimension.isColumnar()) { + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColResolvedFilterInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + // update dimension and column index according to the dimension position in current block + DimColumnResolvedFilterInfo dimColResolvedFilterInfoCopyObject = + dimColResolvedFilterInfo.getCopyObject(); + dimColResolvedFilterInfoCopyObject.setDimension(dimensionFromCurrentBlock); + dimColResolvedFilterInfoCopyObject.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + return new IncludeFilterExecuterImpl(dimColResolvedFilterInfoCopyObject, null, + segmentProperties, false); + } else { + return new RestructureIncludeFilterExecutorImpl(dimColResolvedFilterInfo, + msrColResolvedFilterInfo, false); + } } else { - return new RestructureIncludeFilterExecutorImpl(dimColResolvedFilterInfo, - msrColResolvedFilterInfo, false); + return new IncludeColGroupFilterExecuterImpl(dimColResolvedFilterInfo, segmentProperties); } } else { - return new IncludeColGroupFilterExecuterImpl(dimColResolvedFilterInfo, segmentProperties); + return new IncludeColGroupFilterExecuterImpl(null, segmentProperties); } } @@ -1783,4 +1791,22 @@ public final class FilterUtil { } } } + + /** + * This method will get the no dictionary data based on filters and same + * will be in DimColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return + */ + public static ColumnFilterInfo getImplicitColumnFilterList(List<String> evaluateResultListFinal, + boolean isIncludeFilter) { + ColumnFilterInfo columnFilterInfo = new ColumnFilterInfo(); + columnFilterInfo.setIncludeFilter(isIncludeFilter); + if (null != evaluateResultListFinal) { + columnFilterInfo.setImplicitColumnFilterList(evaluateResultListFinal); + } + return columnFilterInfo; + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java index 6b256f1..f882162 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java @@ -24,7 +24,7 @@ import org.apache.carbondata.core.scan.filter.intf.RowIntf; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; -public class AndFilterExecuterImpl implements FilterExecuter { +public class AndFilterExecuterImpl implements FilterExecuter, ImplicitColumnFilterExecutor { private FilterExecuter leftExecuter; private FilterExecuter rightExecuter; @@ -73,4 +73,63 @@ public class AndFilterExecuterImpl implements FilterExecuter { leftExecuter.readBlocks(blocksChunkHolder); rightExecuter.readBlocks(blocksChunkHolder); } + + @Override + public BitSet isFilterValuesPresentInBlockOrBlocklet(byte[][] maxValue, byte[][] minValue, + String uniqueBlockPath) { + BitSet leftFilters = null; + if (leftExecuter instanceof ImplicitColumnFilterExecutor) { + leftFilters = ((ImplicitColumnFilterExecutor) leftExecuter) + .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue,uniqueBlockPath); + } else { + leftFilters = leftExecuter + .isScanRequired(maxValue, minValue); + } + if (leftFilters.isEmpty()) { + return leftFilters; + } + BitSet rightFilter = null; + if (rightExecuter instanceof ImplicitColumnFilterExecutor) { + rightFilter = ((ImplicitColumnFilterExecutor) rightExecuter) + .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue, uniqueBlockPath); + } else { + rightFilter = rightExecuter + .isScanRequired(maxValue, minValue); + } + if (rightFilter.isEmpty()) { + return rightFilter; + } + leftFilters.and(rightFilter); + return leftFilters; + } + + @Override + public Boolean isFilterValuesPresentInAbstractIndex(byte[][] maxValue, byte[][] minValue) { + Boolean leftRes; + BitSet tempFilter; + if (leftExecuter instanceof ImplicitColumnFilterExecutor) { + leftRes = ((ImplicitColumnFilterExecutor) leftExecuter) + .isFilterValuesPresentInAbstractIndex(maxValue, minValue); + } else { + tempFilter = leftExecuter + .isScanRequired(maxValue, minValue); + leftRes = !tempFilter.isEmpty(); + } + if (!leftRes) { + return leftRes; + } + + Boolean rightRes = null; + if (rightExecuter instanceof ImplicitColumnFilterExecutor) { + rightRes = ((ImplicitColumnFilterExecutor) rightExecuter) + .isFilterValuesPresentInAbstractIndex(maxValue, minValue); + } else { + tempFilter = rightExecuter + .isScanRequired(maxValue, minValue); + rightRes = !tempFilter.isEmpty(); + } + + // Equivalent to leftRes && rightRes. + return rightRes; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitColumnFilterExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitColumnFilterExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitColumnFilterExecutor.java new file mode 100644 index 0000000..8f2ad57 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitColumnFilterExecutor.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.scan.filter.executer; + +import java.util.BitSet; + +/** + * Implementation of this interface will involve block + * and blocklet pruning based on block/blocklet id where + * the filter values are present. + */ +public interface ImplicitColumnFilterExecutor { + + /** + * This method will validate the block or blocklet id with the implicit + * column filter value list and decide whether the required block or + * blocklet has to be scanned for the data or not + * + * @param uniqueBlockPath + * @return + */ + BitSet isFilterValuesPresentInBlockOrBlocklet(byte[][] maxValue, byte[][] minValue, + String uniqueBlockPath); + + /** + * This method will validate the abstract index + * and decide whether the index is valid for scanning or not. + * Implicit index is always considered valid as it can be decided at block level. + * + * @return + */ + Boolean isFilterValuesPresentInAbstractIndex(byte[][] maxValue, byte[][] minValue); +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java new file mode 100644 index 0000000..9c4c7ba --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.intf.RowIntf; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +/** + * This class will implement the blocklet and block pruning logic based + * on the implicit column filter values + */ +public class ImplicitIncludeFilterExecutorImpl + implements FilterExecuter, ImplicitColumnFilterExecutor { + + private final DimColumnResolvedFilterInfo dimColumnEvaluatorInfo; + + public ImplicitIncludeFilterExecutorImpl(DimColumnResolvedFilterInfo dimColumnEvaluatorInfo) { + this.dimColumnEvaluatorInfo = dimColumnEvaluatorInfo; + } + + @Override + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder, boolean useBitsetPipeline) + throws FilterUnsupportedException { + BitSetGroup bitSetGroup = new BitSetGroup(blockChunkHolder.getDataBlock().numberOfPages()); + for (int i = 0; i < blockChunkHolder.getDataBlock().numberOfPages(); i++) { + bitSetGroup.setBitSet( + setBitSetForCompleteDimensionData(blockChunkHolder.getDataBlock().getPageRowCount(i)), i); + } + return bitSetGroup; + } + + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) + throws FilterUnsupportedException, IOException { + return false; + } + + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + return null; + } + + @Override public void readBlocks(BlocksChunkHolder blockChunkHolder) throws IOException { + + } + + @Override + public BitSet isFilterValuesPresentInBlockOrBlocklet(byte[][] maxValue, byte[][] minValue, + String uniqueBlockPath) { + BitSet bitSet = new BitSet(1); + boolean isScanRequired = false; + String shortBlockId = CarbonTablePath.getShortBlockId(uniqueBlockPath); + if (uniqueBlockPath.endsWith(".carbondata")) { + if (dimColumnEvaluatorInfo.getFilterValues().getImplicitDriverColumnFilterList() + .contains(shortBlockId)) { + isScanRequired = true; + } + } else if (dimColumnEvaluatorInfo.getFilterValues().getImplicitColumnFilterList() + .contains(shortBlockId)) { + isScanRequired = true; + } + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * For implicit column filtering, complete data need to be selected. As it is a special case + * no data need to be discarded, implicit filtering is only for slecting block and blocklets + * + * @param numberOfRows + * @return + */ + private BitSet setBitSetForCompleteDimensionData(int numberOfRows) { + BitSet bitSet = new BitSet(); + bitSet.set(0, numberOfRows, true); + return bitSet; + } + + @Override + public Boolean isFilterValuesPresentInAbstractIndex(byte[][] maxValue, byte[][] minValue) { + return true; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/FilterInfoTypeVisitorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/FilterInfoTypeVisitorFactory.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/FilterInfoTypeVisitorFactory.java index 7b69d13..11db803 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/FilterInfoTypeVisitorFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/FilterInfoTypeVisitorFactory.java @@ -35,6 +35,8 @@ public class FilterInfoTypeVisitorFactory { if (exp instanceof RangeExpression) { if (columnExpression.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { return new RangeDirectDictionaryVisitor(); + } else if (columnExpression.getDimension().hasEncoding(Encoding.IMPLICIT)) { + return new ImplicitColumnVisitor(); } else if (!columnExpression.getDimension().hasEncoding(Encoding.DICTIONARY)) { return new RangeNoDictionaryTypeVisitor(); } else if (columnExpression.getDimension().hasEncoding(Encoding.DICTIONARY)) { @@ -45,6 +47,8 @@ public class FilterInfoTypeVisitorFactory { if (null != columnExpression.getDimension()) { if (columnExpression.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { return new CustomTypeDictionaryVisitor(); + } else if (columnExpression.getDimension().hasEncoding(Encoding.IMPLICIT)) { + return new ImplicitColumnVisitor(); } else if (!columnExpression.getDimension().hasEncoding(Encoding.DICTIONARY)) { return new NoDictionaryTypeVisitor(); } else if (columnExpression.getDimension().hasEncoding(Encoding.DICTIONARY)) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/0bbfa859/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/ImplicitColumnVisitor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/ImplicitColumnVisitor.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/ImplicitColumnVisitor.java new file mode 100644 index 0000000..32a3b12 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/ImplicitColumnVisitor.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.scan.filter.resolver.resolverinfo.visitor; + +import java.io.IOException; +import java.util.List; + +import org.apache.carbondata.core.scan.expression.exception.FilterIllegalMemberException; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.ColumnFilterInfo; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.metadata.FilterResolverMetadata; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.ColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; + +public class ImplicitColumnVisitor implements ResolvedFilterInfoVisitorIntf { + + /** + * Visitor Method will update the filter related details in visitableObj, For implicit + * type columns the filter members will resolved directly, no need to look up in dictionary + * since it will not be part of dictionary, directly the actual data can be taken + * and can be set. This type of encoding is effective when the particular column + * is having very high cardinality. + * + * @param visitableObj + * @param metadata + * @throws FilterUnsupportedException,if exception occurs while evaluating + * filter models. + */ + + @Override public void populateFilterResolvedInfo(ColumnResolvedFilterInfo visitableObj, + FilterResolverMetadata metadata) throws FilterUnsupportedException, IOException { + if (visitableObj instanceof DimColumnResolvedFilterInfo) { + ColumnFilterInfo resolvedFilterObject = null; + List<String> evaluateResultListFinal; + try { + evaluateResultListFinal = metadata.getExpression().evaluate(null).getListAsString(); + } catch (FilterIllegalMemberException e) { + throw new FilterUnsupportedException(e); + } + resolvedFilterObject = FilterUtil + .getImplicitColumnFilterList(evaluateResultListFinal, metadata.isIncludeFilter()); + ((DimColumnResolvedFilterInfo)visitableObj).setFilterValues(resolvedFilterObject); + } + } +}
