This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 0e2d3e20cad0e7032d959c5f9107249eaa258685
Author: shivamasn <[email protected]>
AuthorDate: Thu Aug 29 11:49:41 2019 +0530

    [CARBONDATA-3508] Support CG datamap pruning fallback while querying
    
    Problem: Select query fails when the cg datamap is dropped concurrently
    while running the select query on filter column on which datamap is created.
    
    Solution: Handle the exception from datamap blocklet pruning if
    it fails and consider only the pruned blocklets from default datamap 
pruning.
    
    This closes #3369
---
 .../core/indexstore/BlockletDataMapIndexStore.java |  2 +-
 .../statusmanager/SegmentUpdateStatusManager.java  |  6 ++--
 .../datamap/bloom/BloomCoarseGrainDataMap.java     |  2 +-
 .../carbondata/hadoop/api/CarbonInputFormat.java   | 32 ++++++++++++++--------
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
index 32ee9cb..fd549e0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
@@ -80,7 +80,7 @@ public class BlockletDataMapIndexStore
     return get(identifierWrapper, null);
   }
 
-  private BlockletDataMapIndexWrapper 
get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper,
+  public BlockletDataMapIndexWrapper 
get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper,
       Map<String, Map<String, BlockMetaInfo>> segInfoCache) throws IOException 
{
     TableBlockIndexUniqueIdentifier identifier =
         identifierWrapper.getTableBlockIndexUniqueIdentifier();
diff --git 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
index f7083dc..bc794f4 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
@@ -27,8 +27,10 @@ import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -790,8 +792,8 @@ public class SegmentUpdateStatusManager {
     final long deltaEndTimestamp =
         getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, 
block);
 
-    List<CarbonFile> files =
-        new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+    Set<CarbonFile> files =
+        new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
 
     for (CarbonFile eachFile : allSegmentFiles) {
       String fileName = eachFile.getName();
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index fea48c3..f931353 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -232,7 +232,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
       LOGGER.warn(String.format("HitBlocklets is empty in bloom filter prune 
method. " +
               "bloomQueryModels size is %d, filterShards size if %d",
               bloomQueryModels.size(), filteredShard.size()));
-      return null;
+      return new ArrayList<>();
     }
     return new ArrayList<>(hitBlocklets);
   }
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
index ac9e11e..45041e4 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
@@ -573,19 +573,29 @@ m filterExpression
       if (cgDataMapExprWrapper != null) {
         // Prune segments from already pruned blocklets
         DataMapUtil.pruneSegments(segmentIds, prunedBlocklets);
-        List<ExtendedBlocklet> cgPrunedBlocklets;
+        List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
+        boolean isCGPruneFallback = false;
         // Again prune with CG datamap.
-        if (distributedCG && dataMapJob != null) {
-          cgPrunedBlocklets = DataMapUtil
-              .executeDataMapJob(carbonTable, filter.getResolver(), 
dataMapJob, partitionsToPrune,
-                  segmentIds, invalidSegments, DataMapLevel.CG, true, new 
ArrayList<String>());
-        } else {
-          cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, 
partitionsToPrune);
+        try {
+          if (distributedCG && dataMapJob != null) {
+            cgPrunedBlocklets = DataMapUtil
+                .executeDataMapJob(carbonTable, filter.getResolver(), 
dataMapJob, partitionsToPrune,
+                    segmentIds, invalidSegments, DataMapLevel.CG, true, new 
ArrayList<String>());
+          } else {
+            cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, 
partitionsToPrune);
+          }
+        } catch (Exception e) {
+          isCGPruneFallback = true;
+          LOG.error("CG datamap pruning failed.", e);
+        }
+        // If isCGPruneFallback = true, it means that CG datamap pruning 
failed,
+        // hence no need to do intersect and simply pass the prunedBlocklets 
from default datamap
+        if (!isCGPruneFallback) {
+          // since index datamap prune in segment scope,
+          // the result need to intersect with previous pruned result
+          prunedBlocklets =
+              intersectFilteredBlocklets(carbonTable, prunedBlocklets, 
cgPrunedBlocklets);
         }
-        // since index datamap prune in segment scope,
-        // the result need to intersect with previous pruned result
-        prunedBlocklets =
-            intersectFilteredBlocklets(carbonTable, prunedBlocklets, 
cgPrunedBlocklets);
         if (ExplainCollector.enabled()) {
           ExplainCollector.recordCGDataMapPruning(
               
DataMapWrapperSimpleInfo.fromDataMapWrapper(cgDataMapExprWrapper),

Reply via email to