This is an automated email from the ASF dual-hosted git repository.
amatya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 67720b60ae6 Skip compaction for intervals without data (#15676)
67720b60ae6 is described below
commit 67720b60ae664aab95e5482086e0b4f2a77be500
Author: AmatyaAvadhanula <[email protected]>
AuthorDate: Tue Jan 16 12:31:36 2024 +0530
Skip compaction for intervals without data (#15676)
* Skip compaction for intervals with a single tombstone and no data
---
.../compact/NewestSegmentFirstIterator.java | 6 ++
.../compact/NewestSegmentFirstPolicyTest.java | 113 +++++++++++++++++++++
2 files changed, 119 insertions(+)
diff --git
a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
b/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
index 05bae7d2386..c4ae771f808 100644
---
a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
+++
b/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
@@ -339,6 +339,12 @@ public class NewestSegmentFirstIterator implements
CompactionSegmentIterator
while (compactibleSegmentIterator.hasNext()) {
List<DataSegment> segments = compactibleSegmentIterator.next();
+ // Do not compact an interval which comprises of a single tombstone
+ // If there are multiple tombstones in the interval, we may still want
to compact them
+ if (segments.size() == 1 && segments.get(0).isTombstone()) {
+ continue;
+ }
+
final SegmentsToCompact candidates = SegmentsToCompact.from(segments);
final Interval interval = candidates.getUmbrellaInterval();
diff --git
a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
index 553445d9f73..dda1cb1af13 100644
---
a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
+++
b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
@@ -57,6 +57,7 @@ import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.apache.druid.timeline.partition.ShardSpec;
+import org.apache.druid.timeline.partition.TombstoneShardSpec;
import org.apache.druid.utils.Streams;
import org.joda.time.DateTimeZone;
import org.joda.time.Interval;
@@ -1764,6 +1765,118 @@ public class NewestSegmentFirstPolicyTest
Assert.assertFalse(iterator.hasNext());
}
+ @Test
+ public void testSkipCompactionForIntervalsContainingSingleTombstone()
+ {
+ final DataSegment tombstone2023 = new DataSegment(
+ DATA_SOURCE,
+ Intervals.of("2023/2024"),
+ "0",
+ new HashMap<>(),
+ new ArrayList<>(),
+ new ArrayList<>(),
+ TombstoneShardSpec.INSTANCE,
+ 0,
+ 1);
+ final DataSegment dataSegment2023 = new DataSegment(
+ DATA_SOURCE,
+ Intervals.of("2023/2024"),
+ "0",
+ new HashMap<>(),
+ new ArrayList<>(),
+ new ArrayList<>(),
+ new NumberedShardSpec(1, 0),
+ 0,
+ 100);
+ final DataSegment tombstone2024 = new DataSegment(
+ DATA_SOURCE,
+ Intervals.of("2024/2025"),
+ "0",
+ new HashMap<>(),
+ new ArrayList<>(),
+ new ArrayList<>(),
+ TombstoneShardSpec.INSTANCE,
+ 0,
+ 1);
+
+ CompactionSegmentIterator iterator = policy.reset(
+ ImmutableMap.of(DATA_SOURCE,
+ createCompactionConfig(10000,
+ new Period("P0D"),
+ new
UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null)
+ )
+ ),
+ ImmutableMap.of(
+ DATA_SOURCE,
+ SegmentTimeline.forSegments(ImmutableSet.of(tombstone2023,
dataSegment2023, tombstone2024))
+ ),
+ Collections.emptyMap()
+ );
+
+ // Skips 2024/2025 since it has a single tombstone and no data.
+ // Return all segments in 2023/2024 since at least one of them has data
despite there being a tombstone.
+ Assert.assertEquals(
+ ImmutableList.of(tombstone2023, dataSegment2023),
+ iterator.next().getSegments()
+ );
+
+ final DataSegment tombstone2025Jan = new DataSegment(
+ DATA_SOURCE,
+ Intervals.of("2025-01-01/2025-02-01"),
+ "0",
+ new HashMap<>(),
+ new ArrayList<>(),
+ new ArrayList<>(),
+ TombstoneShardSpec.INSTANCE,
+ 0,
+ 1);
+ final DataSegment tombstone2025Feb = new DataSegment(
+ DATA_SOURCE,
+ Intervals.of("2025-02-01/2025-03-01"),
+ "0",
+ new HashMap<>(),
+ new ArrayList<>(),
+ new ArrayList<>(),
+ TombstoneShardSpec.INSTANCE,
+ 0,
+ 1);
+ final DataSegment tombstone2025Mar = new DataSegment(
+ DATA_SOURCE,
+ Intervals.of("2025-03-01/2025-04-01"),
+ "0",
+ new HashMap<>(),
+ new ArrayList<>(),
+ new ArrayList<>(),
+ TombstoneShardSpec.INSTANCE,
+ 0,
+ 1);
+ iterator = policy.reset(
+ ImmutableMap.of(DATA_SOURCE,
+ createCompactionConfig(10000,
+ new Period("P0D"),
+ new
UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null)
+ )
+ ),
+ ImmutableMap.of(
+ DATA_SOURCE,
+ SegmentTimeline.forSegments(ImmutableSet.of(
+ tombstone2023,
+ dataSegment2023,
+ tombstone2024,
+ tombstone2025Jan,
+ tombstone2025Feb,
+ tombstone2025Mar
+ ))
+ ),
+ Collections.emptyMap()
+ );
+ // Does not skip the tombstones in 2025 since there are multiple of them
which could potentially be condensed to one
+ Assert.assertEquals(
+ ImmutableList.of(tombstone2025Jan, tombstone2025Feb, tombstone2025Mar),
+ iterator.next().getSegments()
+ );
+ }
+
private static void assertCompactSegmentIntervals(
CompactionSegmentIterator iterator,
Period segmentPeriod,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]