This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 9a06aeb063c7d22143e853bcd95a64efa9a8f7f8
Author: Alexey Serbin <[email protected]>
AuthorDate: Thu Dec 19 13:21:34 2024 -0800

    KUDU-3573 fix flakiness in TestNewOpsDontGetScheduledDuringUnregister
    
    Change-Id: Ie2b12be86f3a6ab52d8a0160b481e10b75054e15
    Reviewed-on: http://gerrit.cloudera.org:8080/22248
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Marton Greber <[email protected]>
    Reviewed-by: Gabriella Lotz <[email protected]>
---
 src/kudu/util/maintenance_manager-test.cc | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/kudu/util/maintenance_manager-test.cc 
b/src/kudu/util/maintenance_manager-test.cc
index df6f8373a..ca16374b3 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -446,15 +446,24 @@ TEST_F(MaintenanceManagerTest, 
TestRegisterUnregisterWithContention) {
 // Regression test for KUDU-1495: when an operation is being unregistered,
 // new instances of that operation should not be scheduled.
 TEST_F(MaintenanceManagerTest, TestNewOpsDontGetScheduledDuringUnregister) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+
+  // Set the op to run up to 10 times, and each time it sleeps for some time.
+  // The sleep time is set quite high to avoid flakiness due to scheduler
+  // anomalies when running this scenario on busy machines, especially if the
+  // binaries are instrumented by ASAN/TSAN. When all the avaiable maintenance
+  // worker threads are busy with running their first iteration of 'op1',
+  // if the main thread is scheduled off the CPU for a long time before
+  // unregistering the operation, the sleep time should provide enough margin
+  // for the main thread becoming active again and unregistering 'op1'.
   TestMaintenanceOp op1("1", MaintenanceOp::HIGH_IO_USAGE);
   op1.set_perf_improvement(10);
-
-  // Set the op to run up to 10 times, and each time should sleep for a second.
   op1.set_remaining_runs(10);
-  op1.set_sleep_time(MonoDelta::FromSeconds(1));
+  op1.set_sleep_time(MonoDelta::FromSeconds(5));
   manager_->RegisterOp(&op1);
 
-  // Wait until two instances of the ops start running, since we have two MM 
threads.
+  // Wait until two instances of the ops start running, since we have two
+  // maintenance worker threads.
   ASSERT_EVENTUALLY([&]() {
     ASSERT_EQ(op1.RunningGauge()->value(), 2);
   });

Reply via email to