(druid) branch master updated: Bulk add to timeline in DataSourceCompactibleSegmentIterator. (#18774)

gian Sun, 23 Nov 2025 06:59:34 -0800

This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git



The following commit(s) were added to refs/heads/master by this push:
     new 59b698dd340 Bulk add to timeline in 
DataSourceCompactibleSegmentIterator. (#18774)
59b698dd340 is described below

commit 59b698dd3403ac922e1dfa7899b75df76f4523ee
Author: Gian Merlino <[email protected]>
AuthorDate: Sun Nov 23 06:59:20 2025 -0800

    Bulk add to timeline in DataSourceCompactibleSegmentIterator. (#18774)
    
    Calling "addAll" is more efficient than calling "add" in a loop,
    it is O(N) instead of O(N^2).
---
 .../druid/timeline/VersionedIntervalTimeline.java  | 12 ++++++--
 .../DataSourceCompactibleSegmentIterator.java      | 34 +++++++++++++++-------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
 
b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
index 586f445fc0f..c3bd293e433 100644
--- 
a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
+++ 
b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
@@ -176,14 +176,20 @@ public class VersionedIntervalTimeline<VersionType, 
ObjectType extends Overshado
         .toSet();
   }
 
+  /**
+   * Add a single partition chunk entry to this timeline. Avoid calling this 
in a loop, since it
+   * is O(objects in holder) and can therefore creates O(N^2) situations. 
Instead use {@link #addAll(Iterator)}
+   * if you have many objects to add.
+   */
   public void add(final Interval interval, VersionType version, 
PartitionChunk<ObjectType> object)
   {
     addAll(Iterators.singletonIterator(new PartitionChunkEntry<>(interval, 
version, object)));
   }
 
-  public void addAll(
-      final Iterator<PartitionChunkEntry<VersionType, ObjectType>> objects
-  )
+  /**
+   * Adds partition chunk entries to this timeline.
+   */
+  public void addAll(final Iterator<PartitionChunkEntry<VersionType, 
ObjectType>> objects)
   {
     lock.writeLock().lock();
 
diff --git 
a/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
 
b/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
index 40867eb5c0c..9b87b0409f6 100644
--- 
a/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
+++ 
b/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
@@ -20,7 +20,9 @@
 package org.apache.druid.server.compaction;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
@@ -33,6 +35,7 @@ import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.Partitions;
 import org.apache.druid.timeline.SegmentTimeline;
 import org.apache.druid.timeline.TimelineObjectHolder;
+import org.apache.druid.timeline.VersionedIntervalTimeline;
 import org.apache.druid.timeline.partition.NumberedPartitionChunk;
 import org.apache.druid.timeline.partition.NumberedShardSpec;
 import org.apache.druid.timeline.partition.PartitionChunk;
@@ -137,18 +140,29 @@ public class DataSourceCompactibleSegmentIterator 
implements CompactionSegmentIt
           final String temporaryVersion = DateTimes.nowUtc().toString();
           for (Map.Entry<Interval, Set<DataSegment>> partitionsPerInterval : 
intervalToPartitionMap.entrySet()) {
             Interval interval = partitionsPerInterval.getKey();
-            int partitionNum = 0;
             Set<DataSegment> segmentSet = partitionsPerInterval.getValue();
             int partitions = segmentSet.size();
-            for (DataSegment segment : segmentSet) {
-              DataSegment segmentsForCompact = segment.withShardSpec(new 
NumberedShardSpec(partitionNum, partitions));
-              timelineWithConfiguredSegmentGranularity.add(
-                  interval,
-                  temporaryVersion,
-                  NumberedPartitionChunk.make(partitionNum, partitions, 
segmentsForCompact)
-              );
-              partitionNum += 1;
-            }
+            timelineWithConfiguredSegmentGranularity.addAll(
+                Iterators.transform(
+                    segmentSet.iterator(),
+                    new Function<>()
+                    {
+                      int partitionNum = 0;
+
+                      @Override
+                      public 
VersionedIntervalTimeline.PartitionChunkEntry<String, DataSegment> 
apply(DataSegment segment)
+                      {
+                        final DataSegment segmentForCompact =
+                            segment.withShardSpec(new 
NumberedShardSpec(partitionNum, partitions));
+                        return new 
VersionedIntervalTimeline.PartitionChunkEntry<>(
+                            interval,
+                            temporaryVersion,
+                            NumberedPartitionChunk.make(partitionNum++, 
partitions, segmentForCompact)
+                        );
+                      }
+                    }
+                )
+            );
           }
           // PartitionHolder can only holds chunks of one partition space
           // However, partition in the new timeline 
(timelineWithConfiguredSegmentGranularity) can be hold multiple


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(druid) branch master updated: Bulk add to timeline in DataSourceCompactibleSegmentIterator. (#18774)

Reply via email to