Repository: carbondata Updated Branches: refs/heads/carbonstore 3e1da7c3b -> 00cc601da (forced update)
[CARBONDATA-2020][Old Store Support] Add filter support for old store reading to improve query performance Problem For old stores blocklet level min/max comparison was not happening in the executor side due to which all the blocklets were getting scanned. This increased the IO and scanning time in the executor. Solution Modified code to retrieve the min/max value from blocklet node and use it for comparsion while scanning for valid blocklets. This closes #1818 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d1d726a7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d1d726a7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d1d726a7 Branch: refs/heads/carbonstore Commit: d1d726a777c8ed848d71fa05a18dfd70b05d65bd Parents: 9550e69 Author: manishgupta88 <[email protected]> Authored: Wed Jan 17 11:27:09 2018 +0530 Committer: ravipesala <[email protected]> Committed: Thu Jan 18 22:30:23 2018 +0530 ---------------------------------------------------------------------- .../blockletindex/BlockletDataMap.java | 8 +++++-- .../BlockletDataRefNodeWrapper.java | 19 ++++++++++++++-- .../core/scan/filter/ColumnFilterInfo.java | 24 ++++++++++++++++++++ .../ImplicitIncludeFilterExecutorImpl.java | 7 +++++- .../core/scan/scanner/impl/FilterScanner.java | 18 ++++++++++++--- 5 files changed, 68 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java index ee74fad..e1fa686 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java @@ -691,8 +691,12 @@ public class BlockletDataMap implements DataMap, Cacheable { byte[][] minValue, String filePath, int blockletId) { BitSet bitSet = null; if (filterExecuter instanceof ImplicitColumnFilterExecutor) { - String uniqueBlockPath = filePath.substring(filePath.lastIndexOf("/Part") + 1) - + CarbonCommonConstants.FILE_SEPARATOR + blockletId; + String uniqueBlockPath = filePath.substring(filePath.lastIndexOf("/Part") + 1); + // this case will come in case of old store where index file does not contain the + // blocklet information + if (blockletId != -1) { + uniqueBlockPath = uniqueBlockPath + CarbonCommonConstants.FILE_SEPARATOR + blockletId; + } bitSet = ((ImplicitColumnFilterExecutor) filterExecuter) .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue, uniqueBlockPath); } else { http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java index dfc8a38..1585414 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java @@ -31,6 +31,7 @@ import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkRea import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader; import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; +import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex; /** * wrapper for blocklet data map data @@ -91,11 +92,25 @@ public class BlockletDataRefNodeWrapper implements DataRefNode { return blockInfos.get(index).getDetailInfo().getBlockletId().toString(); } - @Override public byte[][] getColumnsMaxValue() { + @Override + public byte[][] getColumnsMaxValue() { + BlockletIndex blockletIndex = + blockInfos.get(index).getDetailInfo().getBlockletInfo().getBlockletIndex(); + // In case of blocklet distribution this will be null + if (null != blockletIndex) { + return blockletIndex.getMinMaxIndex().getMaxValues(); + } return null; } - @Override public byte[][] getColumnsMinValue() { + @Override + public byte[][] getColumnsMinValue() { + BlockletIndex blockletIndex = + blockInfos.get(index).getDetailInfo().getBlockletInfo().getBlockletIndex(); + // In case of blocklet distribution this will be null + if (null != blockletIndex) { + return blockletIndex.getMinMaxIndex().getMinValues(); + } return null; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java index 6a7fced..b5b6017 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java @@ -18,7 +18,11 @@ package org.apache.carbondata.core.scan.filter; import java.io.Serializable; +import java.util.HashSet; import java.util.List; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; public class ColumnFilterInfo implements Serializable { @@ -32,6 +36,7 @@ public class ColumnFilterInfo implements Serializable { * Implicit column filter values to be used for block and blocklet pruning */ private List<String> implicitColumnFilterList; + private transient Set<String> implicitDriverColumnFilterList; private List<Integer> excludeFilterList; /** * maintain the no dictionary filter values list. @@ -95,4 +100,23 @@ public class ColumnFilterInfo implements Serializable { public void setMeasuresFilterValuesList(List<Object> measuresFilterValuesList) { this.measuresFilterValuesList = measuresFilterValuesList; } + + public Set<String> getImplicitDriverColumnFilterList() { + // this list is required to be populated only n case of driver, so in executor this check will + // avoid unnecessary loading of the driver filter list + if (null == implicitDriverColumnFilterList) { + populateBlockIdListForDriverBlockPruning(); + } + return implicitDriverColumnFilterList; + } + + private void populateBlockIdListForDriverBlockPruning() { + implicitDriverColumnFilterList = new HashSet<>(implicitColumnFilterList.size()); + String blockId = null; + for (String blockletId : implicitColumnFilterList) { + blockId = + blockletId.substring(0, blockletId.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR)); + implicitDriverColumnFilterList.add(blockId); + } + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java index 5289e45..9c4c7ba 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java @@ -70,7 +70,12 @@ public class ImplicitIncludeFilterExecutorImpl BitSet bitSet = new BitSet(1); boolean isScanRequired = false; String shortBlockId = CarbonTablePath.getShortBlockId(uniqueBlockPath); - if (dimColumnEvaluatorInfo.getFilterValues().getImplicitColumnFilterList() + if (uniqueBlockPath.endsWith(".carbondata")) { + if (dimColumnEvaluatorInfo.getFilterValues().getImplicitDriverColumnFilterList() + .contains(shortBlockId)) { + isScanRequired = true; + } + } else if (dimColumnEvaluatorInfo.getFilterValues().getImplicitColumnFilterList() .contains(shortBlockId)) { isScanRequired = true; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java index efc0e20..79f9b49 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java @@ -29,6 +29,7 @@ import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.scan.result.AbstractScannedResult; import org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult; @@ -107,9 +108,20 @@ public class FilterScanner extends AbstractBlockletScanner { totalPagesScanned.getCount() + blocksChunkHolder.getDataBlock().numberOfPages()); // apply min max if (isMinMaxEnabled) { - BitSet bitSet = this.filterExecuter - .isScanRequired(blocksChunkHolder.getDataBlock().getColumnsMaxValue(), - blocksChunkHolder.getDataBlock().getColumnsMinValue()); + BitSet bitSet = null; + // check for implicit include filter instance + if (filterExecuter instanceof ImplicitColumnFilterExecutor) { + String blockletId = blockExecutionInfo.getBlockId() + CarbonCommonConstants.FILE_SEPARATOR + + blocksChunkHolder.getDataBlock().blockletId(); + bitSet = ((ImplicitColumnFilterExecutor) filterExecuter) + .isFilterValuesPresentInBlockOrBlocklet( + blocksChunkHolder.getDataBlock().getColumnsMaxValue(), + blocksChunkHolder.getDataBlock().getColumnsMinValue(), blockletId); + } else { + bitSet = this.filterExecuter + .isScanRequired(blocksChunkHolder.getDataBlock().getColumnsMaxValue(), + blocksChunkHolder.getDataBlock().getColumnsMinValue()); + } if (bitSet.isEmpty()) { CarbonUtil.freeMemory(blocksChunkHolder.getDimensionRawDataChunk(), blocksChunkHolder.getMeasureRawDataChunk());
