This is an automated email from the ASF dual-hosted git repository. kfaraz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push: new cac8b9da8fa Restrict segment metadata kill query till maxInterval from last kill task time (#17770) cac8b9da8fa is described below commit cac8b9da8fa6f4fe061b5c171fe8ade3317d50e1 Author: Chetan Patidar <122344823+chetanpatida...@users.noreply.github.com> AuthorDate: Tue Mar 4 13:42:58 2025 +0530 Restrict segment metadata kill query till maxInterval from last kill task time (#17770) Changes --------- - Use `maxIntervalToKill` to determine search interval for killing unused segments. - If no segment has been killed for the datasource yet, use durationToRetain --- .../coordinator/duty/KillUnusedSegments.java | 18 ++++++-- .../coordinator/duty/KillUnusedSegmentsTest.java | 50 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java index 4538d39de95..cd1c1746912 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java @@ -273,9 +273,21 @@ public class KillUnusedSegments implements CoordinatorDuty ) { final DateTime minStartTime = datasourceToLastKillIntervalEnd.get(dataSource); - final DateTime maxEndTime = ignoreDurationToRetain - ? DateTimes.COMPARE_DATE_AS_STRING_MAX - : DateTimes.nowUtc().minus(durationToRetain); + + // Once the first segment from a datasource is killed, we have a valid minStartTime. + // Restricting the upper bound to scan segments metadata while running the kill task results in a efficient SQL query. + final DateTime maxEndTime; + if (ignoreDurationToRetain) { + maxEndTime = DateTimes.COMPARE_DATE_AS_STRING_MAX; + } else if (minStartTime == null) { + maxEndTime = DateTimes.nowUtc().minus(durationToRetain); + } else { + // If we have already killed a segment, limit the kill interval based on the minStartTime + maxEndTime = DateTimes.min( + DateTimes.nowUtc().minus(durationToRetain), + minStartTime.plus(maxIntervalToKill) + ); + } final List<Interval> unusedSegmentIntervals = limitToPeriod( segmentsMetadataManager.getUnusedSegmentIntervals( diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 59df67e4b49..272ed1887ce 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -78,6 +78,7 @@ public class KillUnusedSegmentsTest private static final DateTime NOW = DateTimes.nowUtc(); private static final Interval YEAR_OLD = new Interval(Period.days(1), NOW.minusDays(365)); private static final Interval MONTH_OLD = new Interval(Period.days(1), NOW.minusDays(30)); + private static final Interval FIFTEEN_DAY_OLD = new Interval(Period.days(1), NOW.minusDays(15)); private static final Interval DAY_OLD = new Interval(Period.days(1), NOW.minusDays(1)); private static final Interval HOUR_OLD = new Interval(Period.days(1), NOW.minusHours(1)); private static final Interval NEXT_DAY = new Interval(Period.days(1), NOW.plusDays(1)); @@ -604,6 +605,55 @@ public class KillUnusedSegmentsTest validateLastKillStateAndReset(DS1, YEAR_OLD); } + @Test + public void testMaxIntervalToKillOverridesDurationToRetain() + { + configBuilder.withDurationToRetain(Period.hours(6).toStandardDuration()) + .withMaxIntervalToKill(Period.days(20)); + + initDuty(); + + createAndAddUnusedSegment(DS1, MONTH_OLD, VERSION, NOW.minusDays(29)); + CoordinatorRunStats newDatasourceStats = runDutyAndGetStats(); + + // For a new datasource, the duration to retain is used to determine kill interval + Assert.assertEquals(1, newDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY)); + validateLastKillStateAndReset(DS1, MONTH_OLD); + + // For a datasource where kill has already happened, maxIntervalToKill is used + // if it leads to a smaller kill interval than durationToRetain + createAndAddUnusedSegment(DS1, FIFTEEN_DAY_OLD, VERSION, NOW.minusDays(14)); + createAndAddUnusedSegment(DS1, DAY_OLD, VERSION, NOW.minusHours(2)); + CoordinatorRunStats oldDatasourceStats = runDutyAndGetStats(); + + Assert.assertEquals(2, oldDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY)); + validateLastKillStateAndReset(DS1, FIFTEEN_DAY_OLD); + } + + @Test + public void testDurationToRetainOverridesMaxIntervalToKill() + { + configBuilder.withDurationToRetain(Period.days(20).toStandardDuration()) + .withMaxIntervalToKill(Period.days(350)); + + initDuty(); + + createAndAddUnusedSegment(DS1, YEAR_OLD, VERSION, NOW.minusDays(29)); + CoordinatorRunStats newDatasourceStats = runDutyAndGetStats(); + + Assert.assertEquals(1, newDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY)); + validateLastKillStateAndReset(DS1, YEAR_OLD); + + // For a datasource where (now - durationToRetain) < (lastKillTime(year old segment) + maxInterval) + // Fifteen day old segment will be rejected + createAndAddUnusedSegment(DS1, MONTH_OLD, VERSION, NOW.minusDays(29)); + createAndAddUnusedSegment(DS1, FIFTEEN_DAY_OLD, VERSION, NOW.minusDays(14)); + CoordinatorRunStats oldDatasourceStats = runDutyAndGetStats(); + + Assert.assertEquals(2, oldDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY)); + validateLastKillStateAndReset(DS1, MONTH_OLD); + } + @Test public void testHigherMaxIntervalToKill() { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org