This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.18.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 43df9ee3f02d61d7558e9a45d7a7335aac9576c8
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Feb 7 21:34:38 2025 -0800

    [tests] fix MaintenanceManagerTest.TestRunningInstances
    
    Changelist [1] introduced an extra assertion that started triggering
    from time to time [2].  In addition, since
    MaintenanceManager::GetMaintenanceManagerStatusDump() doesn't
    clear its output parameter, ASSERT_EVENTUALLY() would accumulate
    information on running and completed operations when retrying [3].
    
    This is a follow-up to [1] to address those issues.
    
    [1] https://github.com/apache/kudu/commit/026516a70
    [2] 
http://dist-test.cloudera.org:8080/diagnose?key=f8d3ec0a-e597-11ef-b2cd-42010af00016
    [3] 
http://dist-test.cloudera.org:8080/diagnose?key=026e8330-e596-11ef-a3c0-42010af00016
    
    Change-Id: I3cd0ebc96582109b91f1a8e85d66cca7131750ae
    Reviewed-on: http://gerrit.cloudera.org:8080/22467
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Abhishek Chennaka <[email protected]>
    (cherry picked from commit 8120b129e3f03b007c0d4ccc958eadd7f1250afc)
    Reviewed-on: http://gerrit.cloudera.org:8080/22470
    Reviewed-by: Zoltan Martonka <[email protected]>
---
 src/kudu/util/maintenance_manager-test.cc | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/kudu/util/maintenance_manager-test.cc 
b/src/kudu/util/maintenance_manager-test.cc
index a00a7d61a..f29f19def 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -631,15 +631,19 @@ TEST_F(MaintenanceManagerTest, RunningInstances) {
   MaintenanceManagerStatusPB status_pb;
   std::array<const MaintenanceManagerStatusPB_OpInstancePB*, 2> instances{ 
nullptr, nullptr };
   ASSERT_EVENTUALLY([&]() {
-    // Due to scheduler anomalies and other uncontrollable factors, the main
-    // thread might be put off CPU for some time, so at this point one or even
-    // two operations may be completed already.
+    // Clear 'status_pb' if ASSERT_EVENTUALLY() retries:
+    // MaintenanceManager::GetMaintenanceManagerStatusDump() doesn't clear
+    // its output parameter, adding up more data instead.
+    status_pb.Clear();
     manager_->GetMaintenanceManagerStatusDump(&status_pb);
     const auto num_running = status_pb.running_operations_size();
     ASSERT_GE(num_running, 0);
     const auto num_completed = status_pb.completed_operations_size();
     ASSERT_GE(num_completed, 0);
 
+    // Due to scheduler anomalies and other uncontrollable factors, the main
+    // thread might be put off CPU for some time, so one or even two operations
+    // may be completed already.
     ASSERT_EQ(2, num_running + num_completed);
     if (num_running == 2) {
       instances[0] = &status_pb.running_operations(0);
@@ -662,13 +666,28 @@ TEST_F(MaintenanceManagerTest, RunningInstances) {
   // to complete.
   unregistrant.run();
 
-  // Check that running instances are removed from collection after completion.
+  // Check that running instances aren't present in the collection
+  // when the ops are no longer running.
   {
     MaintenanceManagerStatusPB status_pb;
     manager_->GetMaintenanceManagerStatusDump(&status_pb);
     ASSERT_EQ(0, status_pb.running_operations_size());
-    ASSERT_EQ(2, status_pb.completed_operations_size());
   }
+
+  // The information on the completed tasks appears after waiting for each
+  // of them to wrap up and waking up the scheduler. So, there might be a race
+  // between this test thread which calls GetMaintenanceManagerStatusDump()
+  // and the MM worker threads that update the container with the information
+  // on the completed tasks. The ASSERT_EVENTUALLY() macro below helps
+  // to avoid flakiness in case of scheduler anomalies or when running this
+  // test scenario on a busy machine.
+  ASSERT_EVENTUALLY([&]() {
+    MaintenanceManagerStatusPB status_pb;
+    manager_->GetMaintenanceManagerStatusDump(&status_pb);
+    ASSERT_EQ(2, status_pb.completed_operations_size());
+    ASSERT_NE(status_pb.completed_operations(0).thread_id(),
+              status_pb.completed_operations(1).thread_id());
+  });
 }
 
 // Test adding operations and make sure that the history of recently completed

Reply via email to