This is an automated email from the ASF dual-hosted git repository. ravipesala pushed a commit to branch branch-1.6 in repository https://gitbox.apache.org/repos/asf/carbondata.git
commit 0e2d3e20cad0e7032d959c5f9107249eaa258685 Author: shivamasn <[email protected]> AuthorDate: Thu Aug 29 11:49:41 2019 +0530 [CARBONDATA-3508] Support CG datamap pruning fallback while querying Problem: Select query fails when the cg datamap is dropped concurrently while running the select query on filter column on which datamap is created. Solution: Handle the exception from datamap blocklet pruning if it fails and consider only the pruned blocklets from default datamap pruning. This closes #3369 --- .../core/indexstore/BlockletDataMapIndexStore.java | 2 +- .../statusmanager/SegmentUpdateStatusManager.java | 6 ++-- .../datamap/bloom/BloomCoarseGrainDataMap.java | 2 +- .../carbondata/hadoop/api/CarbonInputFormat.java | 32 ++++++++++++++-------- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java index 32ee9cb..fd549e0 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java @@ -80,7 +80,7 @@ public class BlockletDataMapIndexStore return get(identifierWrapper, null); } - private BlockletDataMapIndexWrapper get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper, + public BlockletDataMapIndexWrapper get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper, Map<String, Map<String, BlockMetaInfo>> segInfoCache) throws IOException { TableBlockIndexUniqueIdentifier identifier = identifierWrapper.getTableBlockIndexUniqueIdentifier(); diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java index f7083dc..bc794f4 100644 --- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java +++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java @@ -27,8 +27,10 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.constants.CarbonCommonConstants; @@ -790,8 +792,8 @@ public class SegmentUpdateStatusManager { final long deltaEndTimestamp = getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block); - List<CarbonFile> files = - new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); + Set<CarbonFile> files = + new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); for (CarbonFile eachFile : allSegmentFiles) { String fileName = eachFile.getName(); diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index fea48c3..f931353 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -232,7 +232,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { LOGGER.warn(String.format("HitBlocklets is empty in bloom filter prune method. " + "bloomQueryModels size is %d, filterShards size if %d", bloomQueryModels.size(), filteredShard.size())); - return null; + return new ArrayList<>(); } return new ArrayList<>(hitBlocklets); } diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java index ac9e11e..45041e4 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java @@ -573,19 +573,29 @@ m filterExpression if (cgDataMapExprWrapper != null) { // Prune segments from already pruned blocklets DataMapUtil.pruneSegments(segmentIds, prunedBlocklets); - List<ExtendedBlocklet> cgPrunedBlocklets; + List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>(); + boolean isCGPruneFallback = false; // Again prune with CG datamap. - if (distributedCG && dataMapJob != null) { - cgPrunedBlocklets = DataMapUtil - .executeDataMapJob(carbonTable, filter.getResolver(), dataMapJob, partitionsToPrune, - segmentIds, invalidSegments, DataMapLevel.CG, true, new ArrayList<String>()); - } else { - cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, partitionsToPrune); + try { + if (distributedCG && dataMapJob != null) { + cgPrunedBlocklets = DataMapUtil + .executeDataMapJob(carbonTable, filter.getResolver(), dataMapJob, partitionsToPrune, + segmentIds, invalidSegments, DataMapLevel.CG, true, new ArrayList<String>()); + } else { + cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, partitionsToPrune); + } + } catch (Exception e) { + isCGPruneFallback = true; + LOG.error("CG datamap pruning failed.", e); + } + // If isCGPruneFallback = true, it means that CG datamap pruning failed, + // hence no need to do intersect and simply pass the prunedBlocklets from default datamap + if (!isCGPruneFallback) { + // since index datamap prune in segment scope, + // the result need to intersect with previous pruned result + prunedBlocklets = + intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets); } - // since index datamap prune in segment scope, - // the result need to intersect with previous pruned result - prunedBlocklets = - intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets); if (ExplainCollector.enabled()) { ExplainCollector.recordCGDataMapPruning( DataMapWrapperSimpleInfo.fromDataMapWrapper(cgDataMapExprWrapper),
