maytasm commented on a change in pull request #10371:
URL: https://github.com/apache/druid/pull/10371#discussion_r489208067
##########
File path:
server/src/main/java/org/apache/druid/server/coordinator/duty/NewestSegmentFirstIterator.java
##########
@@ -336,25 +353,72 @@ private boolean
needsCompaction(ClientCompactionTaskQueryTuningConfig tuningConf
* @return segments to compact
*/
private SegmentsToCompact findSegmentsToCompact(
+ final String dataSourceName,
final CompactibleTimelineObjectHolderCursor
compactibleTimelineObjectHolderCursor,
final DataSourceCompactionConfig config
)
{
- final long inputSegmentSize = config.getInputSegmentSizeBytes();
+ while (compactibleTimelineObjectHolderCursor.hasNext()) {
+ final SegmentsToCompact candidates = new
SegmentsToCompact(compactibleTimelineObjectHolderCursor.next());
+ if (isSegmentsNeedCompact(candidates, config, true)) {
+ return candidates;
+ } else {
+ collectSegmentStatistics(processedSegments, dataSourceName,
candidates);
+ }
+ }
+ log.info("All segments look good! Nothing to compact");
+ return new SegmentsToCompact();
+ }
+ /**
+ * Progressively iterates all remaining time intervals (latest first) in the
+ * timeline {@param compactibleTimelineObjectHolderCursor}. Note that the
timeline lookup duration is one day.
+ * The logic for checking if the segments can be compacted or not is then
perform on each iteration.
+ * This is repeated until no remaining time intervals in {@param
compactibleTimelineObjectHolderCursor}.
+ */
+ private void iterateAllSegments(
+ final String dataSourceName,
+ final CompactibleTimelineObjectHolderCursor
compactibleTimelineObjectHolderCursor,
+ final DataSourceCompactionConfig config
+ )
+ {
while (compactibleTimelineObjectHolderCursor.hasNext()) {
final SegmentsToCompact candidates = new
SegmentsToCompact(compactibleTimelineObjectHolderCursor.next());
+ if (isSegmentsNeedCompact(candidates, config, false)) {
+ // Collect statistic for segments that need compaction
+ collectSegmentStatistics(remainingSegments, dataSourceName,
candidates);
+ } else {
+ // Collect statistic for segments that does not need compaction
+ collectSegmentStatistics(processedSegments, dataSourceName,
candidates);
+ }
+ }
+ }
- if (!candidates.isEmpty()) {
- final boolean isCompactibleSize = candidates.getTotalSize() <=
inputSegmentSize;
- final boolean needsCompaction = needsCompaction(
-
ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(),
config.getMaxRowsPerSegment()),
- candidates
- );
+ /**
+ * This method encapsulates the logic for checking if a given {@param
candidates} needs compaction or not.
+ * If {@param logCannotCompactReason} is true then the reason for {@param
candidates} not needing compaction is
+ * logged (for the case that {@param candidates} does not needs compaction).
+ *
+ * @return true if the {@param candidates} needs compaction, false if the
{@param candidates} does not needs compaction
+ */
+ private boolean isSegmentsNeedCompact(
Review comment:
Done
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]