This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 4547400  [tests] fix flakiness in TableLocationsCacheMultiMasterTest
4547400 is described below

commit 454740094d9bf8ffc8478faded2ff282a84f232f
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Nov 19 12:54:05 2021 -0800

    [tests] fix flakiness in TableLocationsCacheMultiMasterTest
    
    This patch addresses flakiness in the ResetCache scenario of the
    TableLocationsCacheMultiMasterTest test.  When running with
    --stress_cpu_threads=16 flag
    
    before: 2 out of 32 runs failed
      http://dist-test.cloudera.org/job?job_id=aserbin.1637351260.146691
    
    after : 0 out of 256 runs failed
      http://dist-test.cloudera.org/job?job_id=aserbin.1637355014.21180
    
    Change-Id: Idfef423b2d1f22a3d0646f720e0b81852fadbd3c
    Reviewed-on: http://gerrit.cloudera.org:8080/18044
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <[email protected]>
---
 src/kudu/integration-tests/table_locations-itest.cc | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/kudu/integration-tests/table_locations-itest.cc 
b/src/kudu/integration-tests/table_locations-itest.cc
index e96eab2..fed0882 100644
--- a/src/kudu/integration-tests/table_locations-itest.cc
+++ b/src/kudu/integration-tests/table_locations-itest.cc
@@ -1284,18 +1284,15 @@ TEST_F(TableLocationsCacheMultiMasterTest, ResetCache) {
 
   int leader_master_idx = -1;
   ASSERT_EVENTUALLY([&] {
-    // Induce a change in master leadership (maybe, even few of them, up to the
-    // number of masters in the cluster).
-    for (auto idx = 0; idx < cluster_->num_masters(); ++idx) {
-      ASSERT_OK(cluster_->master(idx)->Pause());
-      // Make one master to stop sending heartbeats, and give the rest about
-      // three heartbeat periods to elect a new leader in case if the stopped
-      // master was a leader.
-      SleepFor(MonoDelta::FromMilliseconds(
-          2 * kRaftHeartbeatIntervalMs * kMaxMissedHeartbeatPeriods +
-          3 * kRaftHeartbeatIntervalMs));
-      ASSERT_OK(cluster_->master(idx)->Resume());
-    }
+    // Induce a change of the masters' leadership.
+    ASSERT_OK(cluster_->master(former_leader_master_idx)->Pause());
+    // Make one master stop sending heartbeats, and give the rest about three
+    // heartbeat periods to elect a new leader (include an extra margin to keep
+    // the scenario stable).
+    SleepFor(MonoDelta::FromMilliseconds(
+        2 * (kRaftHeartbeatIntervalMs * kMaxMissedHeartbeatPeriods +
+             kRaftHeartbeatIntervalMs * 3)));
+    ASSERT_OK(cluster_->master(former_leader_master_idx)->Resume());
     ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
     ASSERT_NE(former_leader_master_idx, leader_master_idx);
   });

Reply via email to