This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 59b698dd340 Bulk add to timeline in
DataSourceCompactibleSegmentIterator. (#18774)
59b698dd340 is described below
commit 59b698dd3403ac922e1dfa7899b75df76f4523ee
Author: Gian Merlino <[email protected]>
AuthorDate: Sun Nov 23 06:59:20 2025 -0800
Bulk add to timeline in DataSourceCompactibleSegmentIterator. (#18774)
Calling "addAll" is more efficient than calling "add" in a loop,
it is O(N) instead of O(N^2).
---
.../druid/timeline/VersionedIntervalTimeline.java | 12 ++++++--
.../DataSourceCompactibleSegmentIterator.java | 34 +++++++++++++++-------
2 files changed, 33 insertions(+), 13 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
index 586f445fc0f..c3bd293e433 100644
---
a/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
+++
b/processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java
@@ -176,14 +176,20 @@ public class VersionedIntervalTimeline<VersionType,
ObjectType extends Overshado
.toSet();
}
+ /**
+ * Add a single partition chunk entry to this timeline. Avoid calling this
in a loop, since it
+ * is O(objects in holder) and can therefore creates O(N^2) situations.
Instead use {@link #addAll(Iterator)}
+ * if you have many objects to add.
+ */
public void add(final Interval interval, VersionType version,
PartitionChunk<ObjectType> object)
{
addAll(Iterators.singletonIterator(new PartitionChunkEntry<>(interval,
version, object)));
}
- public void addAll(
- final Iterator<PartitionChunkEntry<VersionType, ObjectType>> objects
- )
+ /**
+ * Adds partition chunk entries to this timeline.
+ */
+ public void addAll(final Iterator<PartitionChunkEntry<VersionType,
ObjectType>> objects)
{
lock.writeLock().lock();
diff --git
a/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
b/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
index 40867eb5c0c..9b87b0409f6 100644
---
a/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
+++
b/server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java
@@ -20,7 +20,9 @@
package org.apache.druid.server.compaction;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Intervals;
@@ -33,6 +35,7 @@ import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder;
+import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.NumberedPartitionChunk;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.apache.druid.timeline.partition.PartitionChunk;
@@ -137,18 +140,29 @@ public class DataSourceCompactibleSegmentIterator
implements CompactionSegmentIt
final String temporaryVersion = DateTimes.nowUtc().toString();
for (Map.Entry<Interval, Set<DataSegment>> partitionsPerInterval :
intervalToPartitionMap.entrySet()) {
Interval interval = partitionsPerInterval.getKey();
- int partitionNum = 0;
Set<DataSegment> segmentSet = partitionsPerInterval.getValue();
int partitions = segmentSet.size();
- for (DataSegment segment : segmentSet) {
- DataSegment segmentsForCompact = segment.withShardSpec(new
NumberedShardSpec(partitionNum, partitions));
- timelineWithConfiguredSegmentGranularity.add(
- interval,
- temporaryVersion,
- NumberedPartitionChunk.make(partitionNum, partitions,
segmentsForCompact)
- );
- partitionNum += 1;
- }
+ timelineWithConfiguredSegmentGranularity.addAll(
+ Iterators.transform(
+ segmentSet.iterator(),
+ new Function<>()
+ {
+ int partitionNum = 0;
+
+ @Override
+ public
VersionedIntervalTimeline.PartitionChunkEntry<String, DataSegment>
apply(DataSegment segment)
+ {
+ final DataSegment segmentForCompact =
+ segment.withShardSpec(new
NumberedShardSpec(partitionNum, partitions));
+ return new
VersionedIntervalTimeline.PartitionChunkEntry<>(
+ interval,
+ temporaryVersion,
+ NumberedPartitionChunk.make(partitionNum++,
partitions, segmentForCompact)
+ );
+ }
+ }
+ )
+ );
}
// PartitionHolder can only holds chunks of one partition space
// However, partition in the new timeline
(timelineWithConfiguredSegmentGranularity) can be hold multiple
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]