This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 8120b129e [tests] fix MaintenanceManagerTest.TestRunningInstances
8120b129e is described below
commit 8120b129e3f03b007c0d4ccc958eadd7f1250afc
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Feb 7 21:34:38 2025 -0800
[tests] fix MaintenanceManagerTest.TestRunningInstances
Changelist [1] introduced an extra assertion that started triggering
from time to time [2]. In addition, since
MaintenanceManager::GetMaintenanceManagerStatusDump() doesn't
clear its output parameter, ASSERT_EVENTUALLY() would accumulate
information on running and completed operations when retrying [3].
This is a follow-up to [1] to address those issues.
[1] https://github.com/apache/kudu/commit/026516a70
[2]
http://dist-test.cloudera.org:8080/diagnose?key=f8d3ec0a-e597-11ef-b2cd-42010af00016
[3]
http://dist-test.cloudera.org:8080/diagnose?key=026e8330-e596-11ef-a3c0-42010af00016
Change-Id: I3cd0ebc96582109b91f1a8e85d66cca7131750ae
Reviewed-on: http://gerrit.cloudera.org:8080/22467
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Abhishek Chennaka <[email protected]>
---
src/kudu/util/maintenance_manager-test.cc | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/src/kudu/util/maintenance_manager-test.cc
b/src/kudu/util/maintenance_manager-test.cc
index a00a7d61a..f29f19def 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -631,15 +631,19 @@ TEST_F(MaintenanceManagerTest, RunningInstances) {
MaintenanceManagerStatusPB status_pb;
std::array<const MaintenanceManagerStatusPB_OpInstancePB*, 2> instances{
nullptr, nullptr };
ASSERT_EVENTUALLY([&]() {
- // Due to scheduler anomalies and other uncontrollable factors, the main
- // thread might be put off CPU for some time, so at this point one or even
- // two operations may be completed already.
+ // Clear 'status_pb' if ASSERT_EVENTUALLY() retries:
+ // MaintenanceManager::GetMaintenanceManagerStatusDump() doesn't clear
+ // its output parameter, adding up more data instead.
+ status_pb.Clear();
manager_->GetMaintenanceManagerStatusDump(&status_pb);
const auto num_running = status_pb.running_operations_size();
ASSERT_GE(num_running, 0);
const auto num_completed = status_pb.completed_operations_size();
ASSERT_GE(num_completed, 0);
+ // Due to scheduler anomalies and other uncontrollable factors, the main
+ // thread might be put off CPU for some time, so one or even two operations
+ // may be completed already.
ASSERT_EQ(2, num_running + num_completed);
if (num_running == 2) {
instances[0] = &status_pb.running_operations(0);
@@ -662,13 +666,28 @@ TEST_F(MaintenanceManagerTest, RunningInstances) {
// to complete.
unregistrant.run();
- // Check that running instances are removed from collection after completion.
+ // Check that running instances aren't present in the collection
+ // when the ops are no longer running.
{
MaintenanceManagerStatusPB status_pb;
manager_->GetMaintenanceManagerStatusDump(&status_pb);
ASSERT_EQ(0, status_pb.running_operations_size());
- ASSERT_EQ(2, status_pb.completed_operations_size());
}
+
+ // The information on the completed tasks appears after waiting for each
+ // of them to wrap up and waking up the scheduler. So, there might be a race
+ // between this test thread which calls GetMaintenanceManagerStatusDump()
+ // and the MM worker threads that update the container with the information
+ // on the completed tasks. The ASSERT_EVENTUALLY() macro below helps
+ // to avoid flakiness in case of scheduler anomalies or when running this
+ // test scenario on a busy machine.
+ ASSERT_EVENTUALLY([&]() {
+ MaintenanceManagerStatusPB status_pb;
+ manager_->GetMaintenanceManagerStatusDump(&status_pb);
+ ASSERT_EQ(2, status_pb.completed_operations_size());
+ ASSERT_NE(status_pb.completed_operations(0).thread_id(),
+ status_pb.completed_operations(1).thread_id());
+ });
}
// Test adding operations and make sure that the history of recently completed