maytasm commented on a change in pull request #10371:
URL: https://github.com/apache/druid/pull/10371#discussion_r489208067



##########
File path: 
server/src/main/java/org/apache/druid/server/coordinator/duty/NewestSegmentFirstIterator.java
##########
@@ -336,25 +353,72 @@ private boolean 
needsCompaction(ClientCompactionTaskQueryTuningConfig tuningConf
    * @return segments to compact
    */
   private SegmentsToCompact findSegmentsToCompact(
+      final String dataSourceName,
       final CompactibleTimelineObjectHolderCursor 
compactibleTimelineObjectHolderCursor,
       final DataSourceCompactionConfig config
   )
   {
-    final long inputSegmentSize = config.getInputSegmentSizeBytes();
+    while (compactibleTimelineObjectHolderCursor.hasNext()) {
+      final SegmentsToCompact candidates = new 
SegmentsToCompact(compactibleTimelineObjectHolderCursor.next());
+      if (isSegmentsNeedCompact(candidates, config, true)) {
+        return candidates;
+      } else {
+        collectSegmentStatistics(processedSegments, dataSourceName, 
candidates);
+      }
+    }
+    log.info("All segments look good! Nothing to compact");
+    return new SegmentsToCompact();
+  }
 
+  /**
+   * Progressively iterates all remaining time intervals (latest first) in the
+   * timeline {@param compactibleTimelineObjectHolderCursor}. Note that the 
timeline lookup duration is one day.
+   * The logic for checking if the segments can be compacted or not is then 
perform on each iteration.
+   * This is repeated until no remaining time intervals in {@param 
compactibleTimelineObjectHolderCursor}.
+   */
+  private void iterateAllSegments(
+      final String dataSourceName,
+      final CompactibleTimelineObjectHolderCursor 
compactibleTimelineObjectHolderCursor,
+      final DataSourceCompactionConfig config
+  )
+  {
     while (compactibleTimelineObjectHolderCursor.hasNext()) {
       final SegmentsToCompact candidates = new 
SegmentsToCompact(compactibleTimelineObjectHolderCursor.next());
+      if (isSegmentsNeedCompact(candidates, config, false)) {
+        // Collect statistic for segments that need compaction
+        collectSegmentStatistics(remainingSegments, dataSourceName, 
candidates);
+      } else {
+        // Collect statistic for segments that does not need compaction
+        collectSegmentStatistics(processedSegments, dataSourceName, 
candidates);
+      }
+    }
+  }
 
-      if (!candidates.isEmpty()) {
-        final boolean isCompactibleSize = candidates.getTotalSize() <= 
inputSegmentSize;
-        final boolean needsCompaction = needsCompaction(
-            
ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), 
config.getMaxRowsPerSegment()),
-            candidates
-        );
+  /**
+   * This method encapsulates the logic for checking if a given {@param 
candidates} needs compaction or not.
+   * If {@param logCannotCompactReason} is true then the reason for {@param 
candidates} not needing compaction is
+   * logged (for the case that {@param candidates} does not needs compaction).
+   *
+   * @return true if the {@param candidates} needs compaction, false if the 
{@param candidates} does not needs compaction
+   */
+  private boolean isSegmentsNeedCompact(

Review comment:
       Done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to