Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2410#discussion_r199047302
--- Diff:
hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java ---
@@ -433,46 +434,57 @@ protected Expression
getFilterPredicates(Configuration configuration) {
// First prune using default datamap on driver side.
DataMapExprWrapper dataMapExprWrapper = DataMapChooser
.getDefaultDataMap(getOrCreateCarbonTable(job.getConfiguration()),
resolver);
- List<ExtendedBlocklet> prunedBlocklets =
+ List<ExtendedBlocklet> finalPrunedBlocklets =
dataMapExprWrapper.prune(segmentIds, partitionsToPrune);
-
ExplainCollector.recordDefaultDataMapPruning(
- dataMapExprWrapper.getDataMapSchema(), prunedBlocklets.size());
+ dataMapExprWrapper.getDataMapSchema(),
finalPrunedBlocklets.size());
+ if (finalPrunedBlocklets.size() == 0) {
+ return finalPrunedBlocklets;
+ }
DataMapChooser chooser = new
DataMapChooser(getOrCreateCarbonTable(job.getConfiguration()));
// Get the available CG datamaps and prune further.
DataMapExprWrapper cgDataMapExprWrapper =
chooser.chooseCGDataMap(resolver);
if (cgDataMapExprWrapper != null) {
// Prune segments from already pruned blocklets
- pruneSegments(segmentIds, prunedBlocklets);
+ pruneSegments(segmentIds, finalPrunedBlocklets);
+ List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
// Again prune with CG datamap.
if (distributedCG && dataMapJob != null) {
- prunedBlocklets = DataMapUtil
+ cgPrunedBlocklets = DataMapUtil
.executeDataMapJob(carbonTable, resolver, segmentIds,
cgDataMapExprWrapper, dataMapJob,
partitionsToPrune);
} else {
- prunedBlocklets = cgDataMapExprWrapper.prune(segmentIds,
partitionsToPrune);
+ cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds,
partitionsToPrune);
}
-
+ // since index datamap prune in segment scope,
+ // the result need to intersect with previous pruned result
+ finalPrunedBlocklets = (List) CollectionUtils.intersection(
+ finalPrunedBlocklets, cgPrunedBlocklets);
ExplainCollector.recordCGDataMapPruning(
- cgDataMapExprWrapper.getDataMapSchema(), prunedBlocklets.size());
+ cgDataMapExprWrapper.getDataMapSchema(),
finalPrunedBlocklets.size());
+ }
+
+ if (finalPrunedBlocklets.size() == 0) {
+ return finalPrunedBlocklets;
}
// Now try to prune with FG DataMap.
if (isFgDataMapPruningEnable(job.getConfiguration()) && dataMapJob !=
null) {
DataMapExprWrapper fgDataMapExprWrapper =
chooser.chooseFGDataMap(resolver);
if (fgDataMapExprWrapper != null) {
// Prune segments from already pruned blocklets
- pruneSegments(segmentIds, prunedBlocklets);
- prunedBlocklets = DataMapUtil
+ pruneSegments(segmentIds, finalPrunedBlocklets);
+ List<ExtendedBlocklet> fgPrunedBlocklets = DataMapUtil
.executeDataMapJob(carbonTable, resolver, segmentIds,
fgDataMapExprWrapper, dataMapJob,
partitionsToPrune);
-
+ finalPrunedBlocklets = (List) CollectionUtils.intersection(
+ finalPrunedBlocklets, fgPrunedBlocklets);
ExplainCollector.recordFGDataMapPruning(
- fgDataMapExprWrapper.getDataMapSchema(),
prunedBlocklets.size());
+ fgDataMapExprWrapper.getDataMapSchema(),
finalPrunedBlocklets.size());
}
} // TODO: add a else branch to push FGDataMap pruning to reader side
--- End diff --
This TODO can be removed now
---