marchpure commented on a change in pull request #3986:
URL: https://github.com/apache/carbondata/pull/3986#discussion_r505670979
##
File path:
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1210,6 +1198,39 @@ private static boolean
checkDeleteDeltaFilesInSeg(Segment seg,
return blockLists;
}
+ private static List checkAndGetDeleteDeltaFilesInSeg(Segment seg,
+ SegmentUpdateStatusManager segmentUpdateStatusManager, int
numberDeltaFilesThreshold) {
+
+List blockLists = new ArrayList<>();
+
+Map> blockAndDeleteDeltaFilesMap =
+ segmentUpdateStatusManager.getDeleteDeltaFilesForSegment(seg);
+
+List blockNameList =
+ segmentUpdateStatusManager.getBlockNameFromSegment(seg.getSegmentNo());
+
+Set uniqueBlocks = new HashSet();
+for (final String blockName : blockNameList) {
+
+ List deleteDeltaFiles =
blockAndDeleteDeltaFilesMap.get(blockName);
+
+ if (null != deleteDeltaFiles) {
+for (CarbonFile blocks : deleteDeltaFiles) {
Review comment:
if (delteDeltaFiles.size < threshold) continue
##
File path:
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1039,22 +1039,10 @@ private static boolean
isSegmentValid(LoadMetadataDetails seg) {
if (CompactionType.IUD_DELETE_DELTA == compactionTypeIUD) {
int numberDeleteDeltaFilesThreshold =
CarbonProperties.getInstance().getNoDeleteDeltaFilesThresholdForIUDCompaction();
- List deleteSegments = new ArrayList<>();
for (Segment seg : segments) {
-if (checkDeleteDeltaFilesInSeg(seg, segmentUpdateStatusManager,
Review comment:
remove checkDeleteDeltaFilesInSeg function
##
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
##
@@ -455,6 +455,51 @@ public boolean accept(CarbonFile pathName) {
return null;
}
+ public Map> getDeleteDeltaFilesForSegment(final
Segment seg) {
Review comment:
remove getDeleteDeltaFilesList function
##
File path:
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1210,6 +1198,39 @@ private static boolean
checkDeleteDeltaFilesInSeg(Segment seg,
return blockLists;
}
+ private static List checkAndGetDeleteDeltaFilesInSeg(Segment seg,
+ SegmentUpdateStatusManager segmentUpdateStatusManager, int
numberDeltaFilesThreshold) {
+
+List blockLists = new ArrayList<>();
+
+Map> blockAndDeleteDeltaFilesMap =
+ segmentUpdateStatusManager.getDeleteDeltaFilesForSegment(seg);
+
+List blockNameList =
+ segmentUpdateStatusManager.getBlockNameFromSegment(seg.getSegmentNo());
+
+Set uniqueBlocks = new HashSet();
+for (final String blockName : blockNameList) {
+
+ List deleteDeltaFiles =
blockAndDeleteDeltaFilesMap.get(blockName);
+
+ if (null != deleteDeltaFiles) {
+for (CarbonFile blocks : deleteDeltaFiles) {
Review comment:
blocks -> block
##
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
##
@@ -455,6 +455,51 @@ public boolean accept(CarbonFile pathName) {
return null;
}
+ public Map> getDeleteDeltaFilesForSegment(final
Segment seg) {
+String segmentPath = CarbonTablePath.getSegmentPath(
+ identifier.getTablePath(), seg.getSegmentNo());
+CarbonFile segDir = FileFactory.getCarbonFile(segmentPath);
+CarbonFile[] allDeleteDeltaFilesOfSegment = segDir.listFiles(new
CarbonFileFilter() {
+ @Override
+ public boolean accept(CarbonFile pathName) {
+String fileName = pathName.getName();
+return (pathName.getSize() > 0) &&
Review comment:
getSize() will trigger one S3 IO.
remove getsSize()
##
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
##
@@ -455,6 +455,51 @@ public boolean accept(CarbonFile pathName) {
return null;
}
+ public Map> getDeleteDeltaFilesForSegment(final
Segment seg) {
+String segmentPath = CarbonTablePath.getSegmentPath(
+ identifier.getTablePath(), seg.getSegmentNo());
Review comment:
if SegmentUpdateDetails donot contains seg, we shall return empty
result directly.
which can save a lot of IO overhead
##
File path:
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1210,6 +1198,39 @@ private static boolean
checkDeleteDeltaFilesInSeg(Segment seg,
return blockLists;
}
+ private static List checkAndGetDeleteDeltaFilesInSeg(Segment seg,
+ SegmentUpdateStatusManager segmentUpdateStatusManager, int
numberDeltaFilesThreshold) {
+
+List blockLists = new ArrayList<>();
+
+Map> blockAndDeleteDeltaFilesMap =
+