Github user qiuchenjian commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2713#discussion_r242387613
--- Diff:
datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
---
@@ -218,56 +218,46 @@ public DataMapBuilder createBuilder(Segment segment,
String shardName,
this.bloomFilterSize, this.bloomFilterFpp, bloomCompress);
}
- /**
- * returns all shard directories of bloom index files for query
- * if bloom index files are merged we should get only one shard path
- */
- private Set<String> getAllShardPaths(String tablePath, String segmentId)
{
- String dataMapStorePath = CarbonTablePath.getDataMapStorePath(
- tablePath, segmentId, dataMapName);
- CarbonFile[] carbonFiles =
FileFactory.getCarbonFile(dataMapStorePath).listFiles();
- Set<String> shardPaths = new HashSet<>();
+
+ private boolean isAllShardsMerged(String dmSegmentPath) {
+ boolean mergeShardExist = false;
boolean mergeShardInprogress = false;
- CarbonFile mergeShardFile = null;
+ CarbonFile[] carbonFiles =
FileFactory.getCarbonFile(dmSegmentPath).listFiles();
for (CarbonFile carbonFile : carbonFiles) {
- if
(carbonFile.getName().equals(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME))
{
- mergeShardFile = carbonFile;
- } else if
(carbonFile.getName().equals(BloomIndexFileStore.MERGE_INPROGRESS_FILE)) {
+ String fileName = carbonFile.getName();
+ if
(fileName.equals(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME)) {
+ mergeShardExist = true;
+ } else if
(fileName.equals(BloomIndexFileStore.MERGE_INPROGRESS_FILE)) {
mergeShardInprogress = true;
- } else if (carbonFile.isDirectory()) {
-
shardPaths.add(FileFactory.getPath(carbonFile.getAbsolutePath()).toString());
}
}
- if (mergeShardFile != null && !mergeShardInprogress) {
- // should only get one shard path if mergeShard is generated
successfully
- shardPaths.clear();
-
shardPaths.add(FileFactory.getPath(mergeShardFile.getAbsolutePath()).toString());
- }
- return shardPaths;
+ return mergeShardExist && !mergeShardInprogress;
}
@Override
public List<CoarseGrainDataMap> getDataMaps(Segment segment) throws
IOException {
List<CoarseGrainDataMap> dataMaps = new ArrayList<>();
try {
- Set<String> shardPaths = segmentMap.get(segment.getSegmentNo());
- if (shardPaths == null) {
- shardPaths = getAllShardPaths(getCarbonTable().getTablePath(),
segment.getSegmentNo());
- segmentMap.put(segment.getSegmentNo(), shardPaths);
- }
- Set<String> filteredShards = segment.getFilteredIndexShardNames();
- for (String shard : shardPaths) {
- if
(shard.endsWith(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME) ||
- filteredShards.contains(new File(shard).getName())) {
- // Filter out the tasks which are filtered through Main datamap.
- // for merge shard, shard pruning delay to be done before
pruning blocklet
- BloomCoarseGrainDataMap bloomDM = new BloomCoarseGrainDataMap();
- bloomDM.init(new BloomDataMapModel(shard, cache,
segment.getConfiguration()));
- bloomDM.initIndexColumnConverters(getCarbonTable(),
dataMapMeta.getIndexedColumns());
- bloomDM.setFilteredShard(filteredShards);
- dataMaps.add(bloomDM);
- }
+ String dmSegmentPath = CarbonTablePath.getDataMapStorePath(
+ getCarbonTable().getTablePath(), segment.getSegmentNo(),
dataMapName);
+ boolean useMergeShard = isAllShardsMerged(dmSegmentPath);
+
+ // make use of filtered shard info from default datamap to build
bloom datamap
+ BloomCoarseGrainDataMap bloomDM = new BloomCoarseGrainDataMap();
+ bloomDM.init(new BloomDataMapModel(dmSegmentPath, cache,
FileFactory.getConfiguration()));
+ bloomDM.initIndexColumnConverters(getCarbonTable(),
dataMapMeta.getIndexedColumns());
+ bloomDM.setFilteredShard(segment.getFilteredIndexShardNames(),
useMergeShard);
+ dataMaps.add(bloomDM);
+
+ // save shard info for clearing cache
+ Set<String> shardPaths = new HashSet<>();
+ if (useMergeShard) {
+ shardPaths.add(dmSegmentPath + File.separator +
+ BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME);
+ } else {
+ shardPaths.addAll(segment.getFilteredIndexShardNames());
}
+ segmentMap.put(segment.getSegmentNo(), shardPaths);
--- End diff --
segmentMap is used cache the shardPaths, now it's uselessï¼I don't think
it's necessary to get shardPaths
it's ok to change segmentMap to a Set that add segment no
---