This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.18.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 43df9ee3f02d61d7558e9a45d7a7335aac9576c8 Author: Alexey Serbin <[email protected]> AuthorDate: Fri Feb 7 21:34:38 2025 -0800 [tests] fix MaintenanceManagerTest.TestRunningInstances Changelist [1] introduced an extra assertion that started triggering from time to time [2]. In addition, since MaintenanceManager::GetMaintenanceManagerStatusDump() doesn't clear its output parameter, ASSERT_EVENTUALLY() would accumulate information on running and completed operations when retrying [3]. This is a follow-up to [1] to address those issues. [1] https://github.com/apache/kudu/commit/026516a70 [2] http://dist-test.cloudera.org:8080/diagnose?key=f8d3ec0a-e597-11ef-b2cd-42010af00016 [3] http://dist-test.cloudera.org:8080/diagnose?key=026e8330-e596-11ef-a3c0-42010af00016 Change-Id: I3cd0ebc96582109b91f1a8e85d66cca7131750ae Reviewed-on: http://gerrit.cloudera.org:8080/22467 Tested-by: Alexey Serbin <[email protected]> Reviewed-by: Abhishek Chennaka <[email protected]> (cherry picked from commit 8120b129e3f03b007c0d4ccc958eadd7f1250afc) Reviewed-on: http://gerrit.cloudera.org:8080/22470 Reviewed-by: Zoltan Martonka <[email protected]> --- src/kudu/util/maintenance_manager-test.cc | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/kudu/util/maintenance_manager-test.cc b/src/kudu/util/maintenance_manager-test.cc index a00a7d61a..f29f19def 100644 --- a/src/kudu/util/maintenance_manager-test.cc +++ b/src/kudu/util/maintenance_manager-test.cc @@ -631,15 +631,19 @@ TEST_F(MaintenanceManagerTest, RunningInstances) { MaintenanceManagerStatusPB status_pb; std::array<const MaintenanceManagerStatusPB_OpInstancePB*, 2> instances{ nullptr, nullptr }; ASSERT_EVENTUALLY([&]() { - // Due to scheduler anomalies and other uncontrollable factors, the main - // thread might be put off CPU for some time, so at this point one or even - // two operations may be completed already. + // Clear 'status_pb' if ASSERT_EVENTUALLY() retries: + // MaintenanceManager::GetMaintenanceManagerStatusDump() doesn't clear + // its output parameter, adding up more data instead. + status_pb.Clear(); manager_->GetMaintenanceManagerStatusDump(&status_pb); const auto num_running = status_pb.running_operations_size(); ASSERT_GE(num_running, 0); const auto num_completed = status_pb.completed_operations_size(); ASSERT_GE(num_completed, 0); + // Due to scheduler anomalies and other uncontrollable factors, the main + // thread might be put off CPU for some time, so one or even two operations + // may be completed already. ASSERT_EQ(2, num_running + num_completed); if (num_running == 2) { instances[0] = &status_pb.running_operations(0); @@ -662,13 +666,28 @@ TEST_F(MaintenanceManagerTest, RunningInstances) { // to complete. unregistrant.run(); - // Check that running instances are removed from collection after completion. + // Check that running instances aren't present in the collection + // when the ops are no longer running. { MaintenanceManagerStatusPB status_pb; manager_->GetMaintenanceManagerStatusDump(&status_pb); ASSERT_EQ(0, status_pb.running_operations_size()); - ASSERT_EQ(2, status_pb.completed_operations_size()); } + + // The information on the completed tasks appears after waiting for each + // of them to wrap up and waking up the scheduler. So, there might be a race + // between this test thread which calls GetMaintenanceManagerStatusDump() + // and the MM worker threads that update the container with the information + // on the completed tasks. The ASSERT_EVENTUALLY() macro below helps + // to avoid flakiness in case of scheduler anomalies or when running this + // test scenario on a busy machine. + ASSERT_EVENTUALLY([&]() { + MaintenanceManagerStatusPB status_pb; + manager_->GetMaintenanceManagerStatusDump(&status_pb); + ASSERT_EQ(2, status_pb.completed_operations_size()); + ASSERT_NE(status_pb.completed_operations(0).thread_id(), + status_pb.completed_operations(1).thread_id()); + }); } // Test adding operations and make sure that the history of recently completed
