cecemei commented on code in PR #19059:
URL: https://github.com/apache/druid/pull/19059#discussion_r2907282965
##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java:
##########
@@ -206,6 +231,129 @@ public void
test_ingestDayGranularity_andCompactToMonthGranularity_andCompactToY
verifyCompactedSegmentsHaveFingerprints(yearGranConfig);
}
+ @MethodSource("getPartitionsSpec")
+ @ParameterizedTest(name = "partitionsSpec={0}")
+ public void test_minorCompactionWithMSQ(PartitionsSpec partitionsSpec)
throws Exception
+ {
+ configureCompaction(
+ CompactionEngine.MSQ,
+ new MostFragmentedIntervalFirstPolicy(2, new
HumanReadableBytes("1KiB"), null, 80, null)
+ );
+ KafkaSupervisorSpecBuilder kafkaSupervisorSpecBuilder =
MoreResources.Supervisor.KAFKA_JSON
+ .get()
+ .withDataSchema(schema -> schema.withTimestamp(new
TimestampSpec("timestamp", "iso", null))
+
.withDimensions(DimensionsSpec.builder().useSchemaDiscovery(true).build()))
+ .withTuningConfig(tuningConfig ->
tuningConfig.withMaxRowsPerSegment(1))
+ .withIoConfig(ioConfig ->
ioConfig.withConsumerProperties(kafkaServer.consumerProperties()).withTaskCount(2));
+
+ // Set up first topic and supervisor
+ final String topic1 = IdUtils.getRandomId();
+ kafkaServer.createTopicWithPartitions(topic1, 1);
+ final KafkaSupervisorSpec supervisor1 =
kafkaSupervisorSpecBuilder.withId(topic1).build(dataSource, topic1);
+ cluster.callApi().postSupervisor(supervisor1);
+
+ final int totalRowCount = publish1kRecords(topic1, true) +
publish1kRecords(topic1, false);
+ waitUntilPublishedRecordsAreIngested(totalRowCount);
+
+ // Before compaction
+ Assertions.assertEquals(4, getNumSegmentsWith(Granularities.HOUR));
+
+ // Create a compaction config with DAY granularity
+ InlineSchemaDataSourceCompactionConfig dayGranularityConfig =
+ InlineSchemaDataSourceCompactionConfig
+ .builder()
+ .forDataSource(dataSource)
+ .withSkipOffsetFromLatest(Period.seconds(0))
+ .withGranularitySpec(new
UserCompactionTaskGranularityConfig(Granularities.DAY, null, false))
+ .withDimensionsSpec(new UserCompactionTaskDimensionsConfig(
+ WikipediaStreamEventStreamGenerator.dimensions()
+ .stream()
+
.map(StringDimensionSchema::new)
+
.collect(Collectors.toUnmodifiableList())))
+ .withTaskContext(Map.of("useConcurrentLocks", true))
+ .withIoConfig(new UserCompactionTaskIOConfig(true))
+
.withTuningConfig(UserCompactionTaskQueryTuningConfig.builder().partitionsSpec(partitionsSpec).build())
+ .build();
+
+ runCompactionWithSpec(dayGranularityConfig);
+ waitForAllCompactionTasksToFinish();
+
+ pauseCompaction(dayGranularityConfig);
+ Assertions.assertEquals(0, getNumSegmentsWith(Granularities.HOUR));
+ Assertions.assertEquals(1, getNumSegmentsWith(Granularities.DAY));
+ Assertions.assertEquals("2000", cluster.runSql("SELECT COUNT(*) FROM %s",
dataSource));
+
+ verifyCompactedSegmentsHaveFingerprints(dayGranularityConfig);
+
+ // published another 1k
+ final int appendedRowCount = publish1kRecords(topic1, true);
+ indexer.latchableEmitter().flush();
+ waitUntilPublishedRecordsAreIngested(appendedRowCount);
+
+ // Tear down both topics and supervisors
+ kafkaServer.deleteTopic(topic1);
+ cluster.callApi().postSupervisor(supervisor1.createSuspendedSpec());
+
+ long totalUsed = overlord.latchableEmitter().getMetricValues(
Review Comment:
the last value (not aggregate) should be number of used segments right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]