[CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store Problem: Filter query is failing for V1/V2 carbondata store
Root Cause: in V1 store measure min max was not added in blockminmaxindex in executor when filter is applied min max pruning is failing with array index out of cound exception Solution: Need to add min max for measure column same as already handled in driver block pruning This closes #2550 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0a6fe088 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0a6fe088 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0a6fe088 Branch: refs/heads/branch-1.4 Commit: 0a6fe088941e4281f979503347223da048a47828 Parents: 68e203a Author: kumarvishal09 <[email protected]> Authored: Tue Jul 24 20:10:54 2018 +0530 Committer: ravipesala <[email protected]> Committed: Tue Jul 31 00:11:26 2018 +0530 ---------------------------------------------------------------------- .../indexstore/blockletindex/IndexWrapper.java | 8 +- .../executor/impl/AbstractQueryExecutor.java | 95 ++++++++++---------- 2 files changed, 50 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0a6fe088/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java index 1de3122..9588f57 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java @@ -16,7 +16,6 @@ */ package org.apache.carbondata.core.indexstore.blockletindex; -import java.io.IOException; import java.util.List; import org.apache.carbondata.core.datastore.block.AbstractIndex; @@ -34,12 +33,11 @@ public class IndexWrapper extends AbstractIndex { private List<TableBlockInfo> blockInfos; - public IndexWrapper(List<TableBlockInfo> blockInfos) throws IOException { + public IndexWrapper(List<TableBlockInfo> blockInfos, SegmentProperties segmentProperties) { this.blockInfos = blockInfos; - segmentProperties = new SegmentProperties(blockInfos.get(0).getDetailInfo().getColumnSchemas(), - blockInfos.get(0).getDetailInfo().getDimLens()); + this.segmentProperties = segmentProperties; dataRefNode = new BlockletDataRefNode(blockInfos, 0, - segmentProperties.getDimensionColumnsValueSize()); + this.segmentProperties.getDimensionColumnsValueSize()); } @Override public void buildIndex(List<DataFileFooter> footerList) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/0a6fe088/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index c8c8a0f..5b67921 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -40,13 +40,11 @@ import org.apache.carbondata.core.datastore.IndexKey; import org.apache.carbondata.core.datastore.block.AbstractIndex; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.block.TableBlockInfo; -import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier; import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode; import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper; import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.memory.UnsafeMemoryManager; -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; import org.apache.carbondata.core.metadata.datatype.DataType; @@ -65,6 +63,7 @@ import org.apache.carbondata.core.scan.model.ProjectionMeasure; import org.apache.carbondata.core.scan.model.QueryModel; import org.apache.carbondata.core.stats.QueryStatistic; import org.apache.carbondata.core.stats.QueryStatisticsConstants; +import org.apache.carbondata.core.util.BlockletDataMapUtil; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; import org.apache.carbondata.core.util.CarbonUtil; @@ -128,13 +127,7 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { // so block will be loaded in sorted order this will be required for // query execution Collections.sort(queryModel.getTableBlockInfos()); - - List<AbstractIndex> indexList = new ArrayList<>(); - Map<String, List<TableBlockInfo>> listMap = getFilePathToTableBlockInfoMapping(queryModel); - for (List<TableBlockInfo> tableBlockInfos : listMap.values()) { - indexList.add(new IndexWrapper(tableBlockInfos)); - } - queryProperties.dataBlocks = indexList; + queryProperties.dataBlocks = getDataBlocks(queryModel); queryStatistic .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis()); queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic); @@ -180,25 +173,27 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { } /** - * Method to prepare file path to table block Info mapping + * Method returns the block(s) on which query will get executed * * @param queryModel * @return * @throws IOException */ - private Map<String, List<TableBlockInfo>> getFilePathToTableBlockInfoMapping( - QueryModel queryModel) throws IOException { + private List<AbstractIndex> getDataBlocks(QueryModel queryModel) throws IOException { Map<String, List<TableBlockInfo>> listMap = new LinkedHashMap<>(); - // thsi is introduced to handle the case when CACHE_LEVEL=BLOCK and there are few other dataMaps + // this is introduced to handle the case when CACHE_LEVEL=BLOCK and there are few other dataMaps // like lucene, Bloom created on the table. In that case all the dataMaps will do blocklet // level pruning and blockInfo entries will be repeated with different blockletIds Map<String, DataFileFooter> filePathToFileFooterMapping = new HashMap<>(); + Map<String, SegmentProperties> filePathToSegmentPropertiesMap = new HashMap<>(); for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) { List<TableBlockInfo> tableBlockInfos = listMap.get(blockInfo.getFilePath()); if (tableBlockInfos == null) { tableBlockInfos = new ArrayList<>(); listMap.put(blockInfo.getFilePath(), tableBlockInfos); } + SegmentProperties segmentProperties = + filePathToSegmentPropertiesMap.get(blockInfo.getFilePath()); BlockletDetailInfo blockletDetailInfo = blockInfo.getDetailInfo(); // This case can come in 2 scenarios: // 1. old stores (1.1 or any prior version to 1.1) where blocklet information is not @@ -206,30 +201,45 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { // 2. CACHE_LEVEL is set to block // 3. CACHE_LEVEL is BLOCKLET but filter column min/max is not cached in driver if (blockletDetailInfo.getBlockletInfo() == null || blockletDetailInfo - .isUseMinMaxForPruning()) { - readAndFillBlockletInfo(filePathToFileFooterMapping, tableBlockInfos, blockInfo, - blockletDetailInfo); + .isUseMinMaxForPruning()) { + blockInfo.setBlockOffset(blockletDetailInfo.getBlockFooterOffset()); + DataFileFooter fileFooter = filePathToFileFooterMapping.get(blockInfo.getFilePath()); + if (null == fileFooter) { + blockInfo.setDetailInfo(null); + fileFooter = CarbonUtil.readMetadatFile(blockInfo); + filePathToFileFooterMapping.put(blockInfo.getFilePath(), fileFooter); + blockInfo.setDetailInfo(blockletDetailInfo); + } + if (null == segmentProperties) { + segmentProperties = new SegmentProperties(fileFooter.getColumnInTable(), + blockInfo.getDetailInfo().getDimLens()); + filePathToSegmentPropertiesMap.put(blockInfo.getFilePath(), segmentProperties); + } + readAndFillBlockletInfo(tableBlockInfos, blockInfo, + blockletDetailInfo, fileFooter, segmentProperties); } else { + if (null == segmentProperties) { + segmentProperties = new SegmentProperties(blockInfo.getDetailInfo().getColumnSchemas(), + blockInfo.getDetailInfo().getDimLens()); + filePathToSegmentPropertiesMap.put(blockInfo.getFilePath(), segmentProperties); + } tableBlockInfos.add(blockInfo); } } - return listMap; + List<AbstractIndex> indexList = new ArrayList<>(); + for (List<TableBlockInfo> tableBlockInfos : listMap.values()) { + indexList.add(new IndexWrapper(tableBlockInfos, + filePathToSegmentPropertiesMap.get(tableBlockInfos.get(0).getFilePath()))); + } + return indexList; } /** * Read the file footer of block file and get the blocklets to query */ - private void readAndFillBlockletInfo(Map<String, DataFileFooter> filePathToFileFooterMapping, - List<TableBlockInfo> tableBlockInfos, TableBlockInfo blockInfo, - BlockletDetailInfo blockletDetailInfo) throws IOException { - blockInfo.setBlockOffset(blockletDetailInfo.getBlockFooterOffset()); - DataFileFooter fileFooter = filePathToFileFooterMapping.get(blockInfo.getFilePath()); - if (null == fileFooter) { - blockInfo.setDetailInfo(null); - fileFooter = CarbonUtil.readMetadatFile(blockInfo); - filePathToFileFooterMapping.put(blockInfo.getFilePath(), fileFooter); - blockInfo.setDetailInfo(blockletDetailInfo); - } + private void readAndFillBlockletInfo(List<TableBlockInfo> tableBlockInfos, + TableBlockInfo blockInfo, BlockletDetailInfo blockletDetailInfo, DataFileFooter fileFooter, + SegmentProperties segmentProperties) { List<BlockletInfo> blockletList = fileFooter.getBlockletList(); // cases when blockletID will be -1 // 1. In case of legacy store @@ -241,12 +251,12 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { // fill the info only for given blockletId in detailInfo BlockletInfo blockletInfo = blockletList.get(blockletDetailInfo.getBlockletId()); fillBlockletInfoToTableBlock(tableBlockInfos, blockInfo, blockletDetailInfo, fileFooter, - blockletInfo, blockletDetailInfo.getBlockletId()); + blockletInfo, blockletDetailInfo.getBlockletId(), segmentProperties); } else { short count = 0; for (BlockletInfo blockletInfo : blockletList) { fillBlockletInfoToTableBlock(tableBlockInfos, blockInfo, blockletDetailInfo, fileFooter, - blockletInfo, count); + blockletInfo, count, segmentProperties); count++; } } @@ -254,7 +264,7 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { private void fillBlockletInfoToTableBlock(List<TableBlockInfo> tableBlockInfos, TableBlockInfo blockInfo, BlockletDetailInfo blockletDetailInfo, DataFileFooter fileFooter, - BlockletInfo blockletInfo, short blockletId) { + BlockletInfo blockletInfo, short blockletId, SegmentProperties segmentProperties) { TableBlockInfo info = blockInfo.copy(); BlockletDetailInfo detailInfo = info.getDetailInfo(); // set column schema details @@ -263,14 +273,14 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { byte[][] maxValues = blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues(); byte[][] minValues = blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues(); if (blockletDetailInfo.isLegacyStore()) { + minValues = BlockletDataMapUtil.updateMinValues(segmentProperties, + blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues()); + maxValues = BlockletDataMapUtil.updateMaxValues(segmentProperties, + blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues()); // update min and max values in case of old store for measures as min and max is written // opposite for measures in old store ( store <= 1.1 version) - maxValues = CarbonUtil.updateMinMaxValues(fileFooter, - blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues(), - blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues(), false); - minValues = CarbonUtil.updateMinMaxValues(fileFooter, - blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues(), - blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues(), true); + maxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false); + minValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true); info.setDataBlockFromOldStore(true); } blockletInfo.getBlockletIndex().getMinMaxIndex().setMaxValues(maxValues); @@ -281,17 +291,6 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> { tableBlockInfos.add(info); } - private List<TableBlockUniqueIdentifier> prepareTableBlockUniqueIdentifier( - List<TableBlockInfo> tableBlockInfos, AbsoluteTableIdentifier absoluteTableIdentifier) { - List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers = - new ArrayList<>(tableBlockInfos.size()); - for (TableBlockInfo blockInfo : tableBlockInfos) { - tableBlockUniqueIdentifiers - .add(new TableBlockUniqueIdentifier(absoluteTableIdentifier, blockInfo)); - } - return tableBlockUniqueIdentifiers; - } - protected List<BlockExecutionInfo> getBlockExecutionInfos(QueryModel queryModel) throws IOException, QueryExecutionException { initQuery(queryModel);
