This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new cbb8db7  KUDU-3237 fix MaintenanceManagerTest.TestCompletedOpsHistory
cbb8db7 is described below

commit cbb8db7711785e85e7c74fe86b93d7c1ba9c9c24
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Jan 25 14:42:09 2021 -0800

    KUDU-3237 fix MaintenanceManagerTest.TestCompletedOpsHistory
    
    This patch fixes a flakiness in the TestCompletedOpsHistory scenario.
    The flakiness is a test-only issue which became apparent as a result of
    the recent changes introduced into the MaintenanceManager with 9e4664d44
    changelist.  In essence, with finer granularity of locking in the
    scoped cleanup of the MaintenanceManager::LaunchOp() method, the test
    thread calling MaintenanceManager::GetMaintenanceManagerStatusDump()
    has a slight chance of acquiring 'completed_ops_lock_' ahead of the
    thread executing the code in the LaunchOp()'s scoped cleanup.
    
    This patch wraps the related code into ASSERT_EVENTUALLY to resolve
    test-only race condition mentioned above.  I verified that this patch
    fixes the issue by running the test scenario multiple times under
    dist-test (RELEASE build).
    
    Before: 2 out of 256 runs failed
      http://dist-test.cloudera.org//job?job_id=aserbin.1611806979.74192
    
    After : 0 out of 256 runs failed
      http://dist-test.cloudera.org//job?job_id=aserbin.1611809676.95320
    
    Change-Id: I760287b3ed4d50e32d2f9257e5390fdf8fa8f288
    Reviewed-on: http://gerrit.cloudera.org:8080/16991
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/util/maintenance_manager-test.cc | 34 +++++++++++++++++++------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/kudu/util/maintenance_manager-test.cc 
b/src/kudu/util/maintenance_manager-test.cc
index 428e833..0fc5d80 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -566,29 +566,37 @@ TEST_F(MaintenanceManagerTest, TestRunningInstances) {
   ASSERT_EQ(status_pb.running_operations_size(), 0);
 }
 
-// Test adding operations and make sure that the history of recently completed 
operations
-// is correct in that it wraps around and doesn't grow.
+// Test adding operations and make sure that the history of recently completed
+// operations is correct in that it wraps around and doesn't grow.
 TEST_F(MaintenanceManagerTest, TestCompletedOpsHistory) {
   for (int i = 0; i < kHistorySize + 1; i++) {
-    string name = Substitute("op$0", i);
+    const auto name = Substitute("op$0", i);
     TestMaintenanceOp op(name, MaintenanceOp::HIGH_IO_USAGE);
     op.set_perf_improvement(1);
     op.set_ram_anchored(100);
     manager_->RegisterOp(&op);
 
     ASSERT_EVENTUALLY([&]() {
-        ASSERT_EQ(op.DurationHistogram()->TotalCount(), 1);
-      });
+      ASSERT_EQ(op.DurationHistogram()->TotalCount(), 1);
+    });
     manager_->UnregisterOp(&op);
 
-    MaintenanceManagerStatusPB status_pb;
-    manager_->GetMaintenanceManagerStatusDump(&status_pb);
-    // The size should equal to the current completed OP size,
-    // and should be at most the kHistorySize.
-    ASSERT_EQ(std::min(kHistorySize, i + 1), 
status_pb.completed_operations_size());
-    // The most recently completed op should always be first, even if we wrap
-    // around.
-    ASSERT_EQ(name, status_pb.completed_operations(0).name());
+    // There might be a race between updating the internal container with the
+    // operations completed so far and serving the request to the embedded
+    // web server: the latter might acquire the lock guarding the container
+    // first and output the information before the operation marked 'complete'.
+    // ASSERT_EVENTUALLY() addresses the transient condition.
+    ASSERT_EVENTUALLY([&]() {
+      MaintenanceManagerStatusPB status_pb;
+      manager_->GetMaintenanceManagerStatusDump(&status_pb);
+      // The size should equal to the current completed OP size,
+      // and should be at most the kHistorySize.
+      ASSERT_EQ(std::min(kHistorySize, i + 1),
+                status_pb.completed_operations_size());
+      // The most recently completed op should always be first, even if we wrap
+      // around.
+      ASSERT_EQ(name, status_pb.completed_operations(0).name());
+    });
   }
 }
 

Reply via email to