This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 6e788be [CARBONDATA-3508] Support CG datamap pruning fallback while
querying
6e788be is described below
commit 6e788bea897fa5f265a4849214a139204f2fb10a
Author: shivamasn <[email protected]>
AuthorDate: Thu Aug 29 11:49:41 2019 +0530
[CARBONDATA-3508] Support CG datamap pruning fallback while querying
Problem: Select query fails when the cg datamap is dropped concurrently
while running the select query on filter column on which datamap is created.
Solution: Handle the exception from datamap blocklet pruning if
it fails and consider only the pruned blocklets from default datamap
pruning.
This closes #3369
---
.../core/indexstore/BlockletDataMapIndexStore.java | 2 +-
.../statusmanager/SegmentUpdateStatusManager.java | 6 ++--
.../datamap/bloom/BloomCoarseGrainDataMap.java | 2 +-
.../carbondata/hadoop/api/CarbonInputFormat.java | 32 ++++++++++++++--------
4 files changed, 27 insertions(+), 15 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
index 32ee9cb..fd549e0 100644
---
a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
+++
b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
@@ -80,7 +80,7 @@ public class BlockletDataMapIndexStore
return get(identifierWrapper, null);
}
- private BlockletDataMapIndexWrapper
get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper,
+ public BlockletDataMapIndexWrapper
get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper,
Map<String, Map<String, BlockMetaInfo>> segInfoCache) throws IOException
{
TableBlockIndexUniqueIdentifier identifier =
identifierWrapper.getTableBlockIndexUniqueIdentifier();
diff --git
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
index f7083dc..bc794f4 100644
---
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
+++
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
@@ -27,8 +27,10 @@ import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -790,8 +792,8 @@ public class SegmentUpdateStatusManager {
final long deltaEndTimestamp =
getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT,
block);
- List<CarbonFile> files =
- new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+ Set<CarbonFile> files =
+ new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
for (CarbonFile eachFile : allSegmentFiles) {
String fileName = eachFile.getName();
diff --git
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index fea48c3..f931353 100644
---
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -232,7 +232,7 @@ public class BloomCoarseGrainDataMap extends
CoarseGrainDataMap {
LOGGER.warn(String.format("HitBlocklets is empty in bloom filter prune
method. " +
"bloomQueryModels size is %d, filterShards size if %d",
bloomQueryModels.size(), filteredShard.size()));
- return null;
+ return new ArrayList<>();
}
return new ArrayList<>(hitBlocklets);
}
diff --git
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
index ac9e11e..45041e4 100644
---
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
+++
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
@@ -573,19 +573,29 @@ m filterExpression
if (cgDataMapExprWrapper != null) {
// Prune segments from already pruned blocklets
DataMapUtil.pruneSegments(segmentIds, prunedBlocklets);
- List<ExtendedBlocklet> cgPrunedBlocklets;
+ List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
+ boolean isCGPruneFallback = false;
// Again prune with CG datamap.
- if (distributedCG && dataMapJob != null) {
- cgPrunedBlocklets = DataMapUtil
- .executeDataMapJob(carbonTable, filter.getResolver(),
dataMapJob, partitionsToPrune,
- segmentIds, invalidSegments, DataMapLevel.CG, true, new
ArrayList<String>());
- } else {
- cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds,
partitionsToPrune);
+ try {
+ if (distributedCG && dataMapJob != null) {
+ cgPrunedBlocklets = DataMapUtil
+ .executeDataMapJob(carbonTable, filter.getResolver(),
dataMapJob, partitionsToPrune,
+ segmentIds, invalidSegments, DataMapLevel.CG, true, new
ArrayList<String>());
+ } else {
+ cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds,
partitionsToPrune);
+ }
+ } catch (Exception e) {
+ isCGPruneFallback = true;
+ LOG.error("CG datamap pruning failed.", e);
+ }
+ // If isCGPruneFallback = true, it means that CG datamap pruning
failed,
+ // hence no need to do intersect and simply pass the prunedBlocklets
from default datamap
+ if (!isCGPruneFallback) {
+ // since index datamap prune in segment scope,
+ // the result need to intersect with previous pruned result
+ prunedBlocklets =
+ intersectFilteredBlocklets(carbonTable, prunedBlocklets,
cgPrunedBlocklets);
}
- // since index datamap prune in segment scope,
- // the result need to intersect with previous pruned result
- prunedBlocklets =
- intersectFilteredBlocklets(carbonTable, prunedBlocklets,
cgPrunedBlocklets);
if (ExplainCollector.enabled()) {
ExplainCollector.recordCGDataMapPruning(
DataMapWrapperSimpleInfo.fromDataMapWrapper(cgDataMapExprWrapper),