Fly-Style commented on code in PR #18860:
URL: https://github.com/apache/druid/pull/18860#discussion_r2643102593


##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/autoscaler/CostBasedAutoScalerIntegrationTest.java:
##########
@@ -215,6 +223,64 @@ public void 
test_autoScaler_computesOptimalTaskCountAndProducesScaleUp()
     
cluster.callApi().postSupervisor(kafkaSupervisorSpec.createSuspendedSpec());
   }
 
+  @Test
+  @Timeout(300)
+  void test_scaleDownDuringTaskRollover()
+  {
+    final String superId = dataSource + "_super";
+    final int initialTaskCount = 10;
+
+    final CostBasedAutoScalerConfig autoScalerConfig = 
CostBasedAutoScalerConfig
+        .builder()
+        .enableTaskAutoScaler(true)
+        .taskCountMin(1)
+        .taskCountMax(10)
+        .taskCountStart(initialTaskCount)
+        .scaleActionPeriodMillis(2000)
+        .minTriggerScaleActionFrequencyMillis(2000)
+        // High idle weight ensures scale-down when tasks are mostly idle 
(little data to process)
+        .lagWeight(0.1)
+        .idleWeight(0.9)
+        .build();
+
+    final KafkaSupervisorSpec spec = 
createKafkaSupervisorWithAutoScaler(superId, autoScalerConfig, 
initialTaskCount);
+
+    // Submit the supervisor
+    Assertions.assertEquals(superId, cluster.callApi().postSupervisor(spec));
+
+    // Wait for at least one task running for the datasource managed by the 
supervisor.
+    overlord.latchableEmitter().waitForEvent(e -> 
e.hasMetricName("task/run/time")
+                                                   
.hasDimension(DruidMetrics.DATASOURCE, dataSource));
+
+    // Wait for autoscaler to emit metric indicating scale-down, it should be 
just less than the current task count.
+    overlord.latchableEmitter().waitForEvent(
+        event -> event.hasMetricName(OPTIMAL_TASK_COUNT_METRIC)
+                      .hasValueMatching(Matchers.lessThan((long) 
initialTaskCount)));
+
+    // Wait for tasks to complete (first rollover)
+    overlord.latchableEmitter().waitForEvent(e -> 
e.hasMetricName("task/action/success/count"));
+
+    // Wait for the task running for the datasource managed by a supervisor.
+    overlord.latchableEmitter().waitForEvent(e -> 
e.hasMetricName("task/run/time")
+                                                   
.hasDimension(DruidMetrics.DATASOURCE, dataSource));
+
+    // After rollover, verify that the running task count has decreased
+    // The autoscaler should have recommended fewer tasks due to high idle time
+    final int postRolloverRunningTasks = 
cluster.callApi().getTaskCount("running", dataSource);
+
+    Assertions.assertTrue(

Review Comment:
   Here it is actually should not be skipped :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to