[GitHub] [carbondata] marchpure commented on a change in pull request #3986: [CARBONDATA-4034] Improve the time-consuming of Horizontal Compaction for update

2020-10-16 Thread GitBox


marchpure commented on a change in pull request #3986:
URL: https://github.com/apache/carbondata/pull/3986#discussion_r506261693



##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1246,8 +1209,22 @@ public static boolean isHorizontalCompactionEnabled() {
 // set the update status.
 segmentUpdateStatusManager.setUpdateStatusDetails(segmentUpdateDetails);
 
-CarbonFile[] deleteDeltaFiles =
-segmentUpdateStatusManager.getDeleteDeltaFilesList(new Segment(seg), 
blockName);
+// only when SegmentUpdateDetails contain the specified block
+// will the method getDeleteDeltaFilesList be executed
+List blockNameList = 
segmentUpdateStatusManager.getBlockNameFromSegment(seg);
+Map> blockAndDeleteDeltaFilesMap = new 
HashMap<>();
+CarbonFile[] deleteDeltaFiles = null;
+if (blockNameList.contains(blockName)) {
+  blockAndDeleteDeltaFilesMap =
+  segmentUpdateStatusManager.getDeleteDeltaFilesList(new Segment(seg));
+}
+if (blockAndDeleteDeltaFilesMap.containsKey(blockName)) {
+  List deleteDeltaFileList = 
blockAndDeleteDeltaFilesMap.get(blockName);
+  deleteDeltaFiles = deleteDeltaFileList.toArray(new 
CarbonFile[deleteDeltaFileList.size()]);
+}
+
+// CarbonFile[] deleteDeltaFiles =

Review comment:
   delete these 2 lines

##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1246,8 +1209,22 @@ public static boolean isHorizontalCompactionEnabled() {
 // set the update status.
 segmentUpdateStatusManager.setUpdateStatusDetails(segmentUpdateDetails);
 
-CarbonFile[] deleteDeltaFiles =
-segmentUpdateStatusManager.getDeleteDeltaFilesList(new Segment(seg), 
blockName);
+// only when SegmentUpdateDetails contain the specified block
+// will the method getDeleteDeltaFilesList be executed
+List blockNameList = 
segmentUpdateStatusManager.getBlockNameFromSegment(seg);
+Map> blockAndDeleteDeltaFilesMap = new 
HashMap<>();
+CarbonFile[] deleteDeltaFiles = null;

Review comment:
   = new CarbonFile[0]

##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1246,8 +1209,22 @@ public static boolean isHorizontalCompactionEnabled() {
 // set the update status.
 segmentUpdateStatusManager.setUpdateStatusDetails(segmentUpdateDetails);
 
-CarbonFile[] deleteDeltaFiles =
-segmentUpdateStatusManager.getDeleteDeltaFilesList(new Segment(seg), 
blockName);
+// only when SegmentUpdateDetails contain the specified block
+// will the method getDeleteDeltaFilesList be executed
+List blockNameList = 
segmentUpdateStatusManager.getBlockNameFromSegment(seg);
+Map> blockAndDeleteDeltaFilesMap = new 
HashMap<>();
+CarbonFile[] deleteDeltaFiles = null;
+if (blockNameList.contains(blockName)) {

Review comment:
   if (blockNameList.contains(blockName)) { 
 blockAndDeleteDeltaFilesMap =
 if (blockAndDeleteDeltaFilesMap.containsKey(blockName)) {
 }
   }

##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1246,8 +1209,22 @@ public static boolean isHorizontalCompactionEnabled() {
 // set the update status.
 segmentUpdateStatusManager.setUpdateStatusDetails(segmentUpdateDetails);
 
-CarbonFile[] deleteDeltaFiles =
-segmentUpdateStatusManager.getDeleteDeltaFilesList(new Segment(seg), 
blockName);
+// only when SegmentUpdateDetails contain the specified block
+// will the method getDeleteDeltaFilesList be executed
+List blockNameList = 
segmentUpdateStatusManager.getBlockNameFromSegment(seg);
+Map> blockAndDeleteDeltaFilesMap = new 
HashMap<>();
+CarbonFile[] deleteDeltaFiles = null;
+if (blockNameList.contains(blockName)) {
+  blockAndDeleteDeltaFilesMap =
+  segmentUpdateStatusManager.getDeleteDeltaFilesList(new Segment(seg));
+}
+if (blockAndDeleteDeltaFilesMap.containsKey(blockName)) {

Review comment:
   no need to covert to array





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [carbondata] marchpure commented on a change in pull request #3986: [CARBONDATA-4034] Improve the time-consuming of Horizontal Compaction for update

2020-10-15 Thread GitBox


marchpure commented on a change in pull request #3986:
URL: https://github.com/apache/carbondata/pull/3986#discussion_r505670979



##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1210,6 +1198,39 @@ private static boolean 
checkDeleteDeltaFilesInSeg(Segment seg,
 return blockLists;
   }
 
+  private static List checkAndGetDeleteDeltaFilesInSeg(Segment seg,
+  SegmentUpdateStatusManager segmentUpdateStatusManager, int 
numberDeltaFilesThreshold) {
+
+List blockLists = new ArrayList<>();
+
+Map> blockAndDeleteDeltaFilesMap =
+  segmentUpdateStatusManager.getDeleteDeltaFilesForSegment(seg);
+
+List blockNameList =
+  segmentUpdateStatusManager.getBlockNameFromSegment(seg.getSegmentNo());
+
+Set uniqueBlocks = new HashSet();
+for (final String blockName : blockNameList) {
+
+  List deleteDeltaFiles = 
blockAndDeleteDeltaFilesMap.get(blockName);
+
+  if (null != deleteDeltaFiles) {
+for (CarbonFile blocks : deleteDeltaFiles) {

Review comment:
   if (delteDeltaFiles.size < threshold) continue

##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1039,22 +1039,10 @@ private static boolean 
isSegmentValid(LoadMetadataDetails seg) {
 if (CompactionType.IUD_DELETE_DELTA == compactionTypeIUD) {
   int numberDeleteDeltaFilesThreshold =
   
CarbonProperties.getInstance().getNoDeleteDeltaFilesThresholdForIUDCompaction();
-  List deleteSegments = new ArrayList<>();
   for (Segment seg : segments) {
-if (checkDeleteDeltaFilesInSeg(seg, segmentUpdateStatusManager,

Review comment:
   remove checkDeleteDeltaFilesInSeg function

##
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
##
@@ -455,6 +455,51 @@ public boolean accept(CarbonFile pathName) {
 return null;
   }
 
+  public Map> getDeleteDeltaFilesForSegment(final 
Segment seg) {

Review comment:
   remove getDeleteDeltaFilesList function

##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1210,6 +1198,39 @@ private static boolean 
checkDeleteDeltaFilesInSeg(Segment seg,
 return blockLists;
   }
 
+  private static List checkAndGetDeleteDeltaFilesInSeg(Segment seg,
+  SegmentUpdateStatusManager segmentUpdateStatusManager, int 
numberDeltaFilesThreshold) {
+
+List blockLists = new ArrayList<>();
+
+Map> blockAndDeleteDeltaFilesMap =
+  segmentUpdateStatusManager.getDeleteDeltaFilesForSegment(seg);
+
+List blockNameList =
+  segmentUpdateStatusManager.getBlockNameFromSegment(seg.getSegmentNo());
+
+Set uniqueBlocks = new HashSet();
+for (final String blockName : blockNameList) {
+
+  List deleteDeltaFiles = 
blockAndDeleteDeltaFilesMap.get(blockName);
+
+  if (null != deleteDeltaFiles) {
+for (CarbonFile blocks : deleteDeltaFiles) {

Review comment:
   blocks -> block

##
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
##
@@ -455,6 +455,51 @@ public boolean accept(CarbonFile pathName) {
 return null;
   }
 
+  public Map> getDeleteDeltaFilesForSegment(final 
Segment seg) {
+String segmentPath = CarbonTablePath.getSegmentPath(
+  identifier.getTablePath(), seg.getSegmentNo());
+CarbonFile segDir = FileFactory.getCarbonFile(segmentPath);
+CarbonFile[] allDeleteDeltaFilesOfSegment = segDir.listFiles(new 
CarbonFileFilter() {
+  @Override
+  public boolean accept(CarbonFile pathName) {
+String fileName = pathName.getName();
+return (pathName.getSize() > 0) &&

Review comment:
   getSize() will trigger one S3 IO.
   remove getsSize()

##
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
##
@@ -455,6 +455,51 @@ public boolean accept(CarbonFile pathName) {
 return null;
   }
 
+  public Map> getDeleteDeltaFilesForSegment(final 
Segment seg) {
+String segmentPath = CarbonTablePath.getSegmentPath(
+  identifier.getTablePath(), seg.getSegmentNo());

Review comment:
   if SegmentUpdateDetails  donot contains seg, we shall return empty 
result directly.
   which can save a lot of IO overhead

##
File path: 
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
##
@@ -1210,6 +1198,39 @@ private static boolean 
checkDeleteDeltaFilesInSeg(Segment seg,
 return blockLists;
   }
 
+  private static List checkAndGetDeleteDeltaFilesInSeg(Segment seg,
+  SegmentUpdateStatusManager segmentUpdateStatusManager, int 
numberDeltaFilesThreshold) {
+
+List blockLists = new ArrayList<>();
+
+Map> blockAndDeleteDeltaFilesMap =
+