loquisgon commented on code in PR #12392:
URL: https://github.com/apache/druid/pull/12392#discussion_r848951065


##########
indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java:
##########
@@ -1099,6 +1100,164 @@ public void 
testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva
     
   }
 
+  @Test
+  public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws 
Exception
+  {
+    // This test fails with segment lock because of the bug reported in 
https://github.com/apache/druid/issues/10911.
+    if (lockGranularity == LockGranularity.SEGMENT) {
+      return;
+    }
+
+    // The following task creates (several, more than three, last time I 
checked, six) HOUR segments with intervals of
+    // - 2014-01-01T00:00:00/2014-01-01T01:00:00
+    // - 2014-01-01T01:00:00/2014-01-01T02:00:00
+    // - 2014-01-01T02:00:00/2014-01-01T03:00:00
+    // The six segments are:
+    // three rows in hour 00:
+    // 2014-01-01T00:00:00.000Z_2014-01-01T01:00:00.000Z with two rows
+    // 2014-01-01T00:00:00.000Z_2014-01-01T01:00:00.000Z_1 with one row
+    // three rows in hour 01:
+    // 2014-01-01T01:00:00.000Z_2014-01-01T02:00:00.000Z with two rows
+    // 2014-01-01T01:00:00.000Z_2014-01-01T02:00:00.000Z_1 with one row
+    // four rows in hour 02:
+    // 2014-01-01T02:00:00.000Z_2014-01-01T03:00:00.000Z with two rows
+    // 2014-01-01T02:00:00.000Z_2014-01-01T03:00:00.000Z_1 with two rows
+    // there are 10 rows total in data set
+
+    // maxRowsPerSegment is set to 2 inside the runIndexTask methods
+    Pair<TaskStatus, List<DataSegment>> result = runIndexTask();
+    Assert.assertEquals(6, result.rhs.size());
+
+    final Builder builder = new Builder(
+        DATA_SOURCE,
+        segmentCacheManagerFactory,
+        RETRY_POLICY_FACTORY
+    );
+
+    // Setup partial interval compaction:
+    // Change the granularity from HOUR to MINUTE through compaction for hour 
01, there are three rows in the compaction
+    // interval,
+    // all three in the same timestamp (see TEST_ROWS), this should generate 
one segment in same, first, minute
+    // (task will now use
+    // the default rows per segments since compaction's tuning config is null) 
and
+    // 59 tombstones to completely overshadow the existing hour 01 segment. 
Since the segments outside the
+    // compaction interval should remanin unchanged there should be a total of 
1 + (2 + 59) + 2 = 64 segments
+
+    // **** PARTIAL COMPACTION: hour -> minute ****
+    final Interval compactionPartialInterval = 
Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
+    final CompactionTask partialCompactionTask = builder
+        .segmentGranularity(Granularities.MINUTE)
+        // Set dropExisting to true
+        .inputSpec(new CompactionIntervalSpec(compactionPartialInterval, 
null), true)
+        .build();
+    final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = 
runTask(partialCompactionTask);
+    Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
+
+    // Segments that did not belong in the compaction interval (hours 00 and 
02) are expected unchanged
+    // add 2 unchanged segments for hour 00:
+    final Set<DataSegment> expectedSegments = new HashSet<>();
+    expectedSegments.addAll(
+        getStorageCoordinator().retrieveUsedSegmentsForIntervals(
+            DATA_SOURCE,
+            
Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")),
+            Segments.ONLY_VISIBLE
+        )
+    );
+    // add 2 unchanged segments for hour 02:
+    expectedSegments.addAll(
+        getStorageCoordinator().retrieveUsedSegmentsForIntervals(
+            DATA_SOURCE,
+            
Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")),
+            Segments.ONLY_VISIBLE
+        )
+    );
+    expectedSegments.addAll(partialCompactionResult.rhs);
+    Assert.assertEquals(64, expectedSegments.size());
+
+    // New segments that were compacted are expected. However, old segments of 
the compacted interval should be
+    // overshadowed by the new tombstones (59) being created for all minutes 
other than 01:01
+    final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(
+        getStorageCoordinator().retrieveUsedSegmentsForIntervals(
+            DATA_SOURCE,
+            Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")),
+            Segments.ONLY_VISIBLE
+        )
+    );
+    Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
+    final List<DataSegment> realSegmentsAfterPartialCompaction =
+        segmentsAfterPartialCompaction.stream()
+                                      .filter(s -> !s.isTombstone())
+                                      .collect(Collectors.toList());
+    final List<DataSegment> tombstonesAfterPartialCompaction =
+        segmentsAfterPartialCompaction.stream()
+                                      .filter(s -> s.isTombstone())
+                                      .collect(Collectors.toList());
+    Assert.assertEquals(59, tombstonesAfterPartialCompaction.size());
+    Assert.assertEquals(5, realSegmentsAfterPartialCompaction.size());
+    Assert.assertEquals(64, segmentsAfterPartialCompaction.size());
+
+    // Now setup compaction over an interval with only tombstones, keeping 
same, minute granularity
+    final CompactionTask compactionTaskOverOnlyTombstones = builder
+        .segmentGranularity(null)
+        // Set dropExisting to true
+        // last 59 minutes of our 01, should be all tombstones
+        .inputSpec(new 
CompactionIntervalSpec(Intervals.of("2014-01-01T01:01:00/2014-01-01T02:00:00"), 
null), true)
+        .build();
+
+    // **** Compaction over tombstones ****
+    final Pair<TaskStatus, List<DataSegment>> resultOverOnlyTombstones = 
runTask(compactionTaskOverOnlyTombstones);
+    Assert.assertTrue(resultOverOnlyTombstones.lhs.isSuccess());
+
+    // compaction should not fail but since it is over the same granularity it 
should leave
+    // the tombstones unchanged
+    Assert.assertEquals(59, resultOverOnlyTombstones.rhs.size());
+    resultOverOnlyTombstones.rhs.forEach(t -> 
Assert.assertTrue(t.isTombstone()));
+  }
+
+
+  @Test
+  public void testCompactDatasourceOverFullIntervalWithOnlyTombstones() throws 
Exception

Review Comment:
   Yes, removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to