This is an automated email from the ASF dual-hosted git repository.

amatya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 67720b60ae6 Skip compaction for intervals without data (#15676)
67720b60ae6 is described below

commit 67720b60ae664aab95e5482086e0b4f2a77be500
Author: AmatyaAvadhanula <[email protected]>
AuthorDate: Tue Jan 16 12:31:36 2024 +0530

    Skip compaction for intervals without data (#15676)
    
    * Skip compaction for intervals with a single tombstone and no data
---
 .../compact/NewestSegmentFirstIterator.java        |   6 ++
 .../compact/NewestSegmentFirstPolicyTest.java      | 113 +++++++++++++++++++++
 2 files changed, 119 insertions(+)

diff --git 
a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
 
b/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
index 05bae7d2386..c4ae771f808 100644
--- 
a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
+++ 
b/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java
@@ -339,6 +339,12 @@ public class NewestSegmentFirstIterator implements 
CompactionSegmentIterator
     while (compactibleSegmentIterator.hasNext()) {
       List<DataSegment> segments = compactibleSegmentIterator.next();
 
+      // Do not compact an interval which comprises of a single tombstone
+      // If there are multiple tombstones in the interval, we may still want 
to compact them
+      if (segments.size() == 1 && segments.get(0).isTombstone()) {
+        continue;
+      }
+
       final SegmentsToCompact candidates = SegmentsToCompact.from(segments);
       final Interval interval = candidates.getUmbrellaInterval();
 
diff --git 
a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
 
b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
index 553445d9f73..dda1cb1af13 100644
--- 
a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
+++ 
b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java
@@ -57,6 +57,7 @@ import org.apache.druid.timeline.Partitions;
 import org.apache.druid.timeline.SegmentTimeline;
 import org.apache.druid.timeline.partition.NumberedShardSpec;
 import org.apache.druid.timeline.partition.ShardSpec;
+import org.apache.druid.timeline.partition.TombstoneShardSpec;
 import org.apache.druid.utils.Streams;
 import org.joda.time.DateTimeZone;
 import org.joda.time.Interval;
@@ -1764,6 +1765,118 @@ public class NewestSegmentFirstPolicyTest
     Assert.assertFalse(iterator.hasNext());
   }
 
+  @Test
+  public void testSkipCompactionForIntervalsContainingSingleTombstone()
+  {
+    final DataSegment tombstone2023 = new DataSegment(
+        DATA_SOURCE,
+        Intervals.of("2023/2024"),
+        "0",
+        new HashMap<>(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        TombstoneShardSpec.INSTANCE,
+        0,
+        1);
+    final DataSegment dataSegment2023 = new DataSegment(
+        DATA_SOURCE,
+        Intervals.of("2023/2024"),
+        "0",
+        new HashMap<>(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        new NumberedShardSpec(1, 0),
+        0,
+        100);
+    final DataSegment tombstone2024 = new DataSegment(
+        DATA_SOURCE,
+        Intervals.of("2024/2025"),
+        "0",
+        new HashMap<>(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        TombstoneShardSpec.INSTANCE,
+        0,
+        1);
+
+    CompactionSegmentIterator iterator = policy.reset(
+        ImmutableMap.of(DATA_SOURCE,
+                        createCompactionConfig(10000,
+                                               new Period("P0D"),
+                                               new 
UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null)
+                        )
+        ),
+        ImmutableMap.of(
+            DATA_SOURCE,
+            SegmentTimeline.forSegments(ImmutableSet.of(tombstone2023, 
dataSegment2023, tombstone2024))
+        ),
+        Collections.emptyMap()
+    );
+
+    // Skips 2024/2025 since it has a single tombstone and no data.
+    // Return all segments in 2023/2024 since at least one of them has data 
despite there being a tombstone.
+    Assert.assertEquals(
+        ImmutableList.of(tombstone2023, dataSegment2023),
+        iterator.next().getSegments()
+    );
+
+    final DataSegment tombstone2025Jan = new DataSegment(
+        DATA_SOURCE,
+        Intervals.of("2025-01-01/2025-02-01"),
+        "0",
+        new HashMap<>(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        TombstoneShardSpec.INSTANCE,
+        0,
+        1);
+    final DataSegment tombstone2025Feb = new DataSegment(
+        DATA_SOURCE,
+        Intervals.of("2025-02-01/2025-03-01"),
+        "0",
+        new HashMap<>(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        TombstoneShardSpec.INSTANCE,
+        0,
+        1);
+    final DataSegment tombstone2025Mar = new DataSegment(
+        DATA_SOURCE,
+        Intervals.of("2025-03-01/2025-04-01"),
+        "0",
+        new HashMap<>(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        TombstoneShardSpec.INSTANCE,
+        0,
+        1);
+    iterator = policy.reset(
+        ImmutableMap.of(DATA_SOURCE,
+                        createCompactionConfig(10000,
+                                               new Period("P0D"),
+                                               new 
UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null)
+                        )
+        ),
+        ImmutableMap.of(
+            DATA_SOURCE,
+            SegmentTimeline.forSegments(ImmutableSet.of(
+                tombstone2023,
+                dataSegment2023,
+                tombstone2024,
+                tombstone2025Jan,
+                tombstone2025Feb,
+                tombstone2025Mar
+            ))
+        ),
+        Collections.emptyMap()
+    );
+    // Does not skip the tombstones in 2025 since there are multiple of them 
which could potentially be condensed to one
+    Assert.assertEquals(
+        ImmutableList.of(tombstone2025Jan, tombstone2025Feb, tombstone2025Mar),
+        iterator.next().getSegments()
+    );
+  }
+
   private static void assertCompactSegmentIntervals(
       CompactionSegmentIterator iterator,
       Period segmentPeriod,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to