This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b984851f704808f5ee07182811527e811389aa57
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Feb 14 11:24:17 2020 -0800

    [test] fix flake in TsTabletManagerITest::TestTableStats
    
    Sometimes the leader replica of a tablet can change its role prior to
    receiving StepDown request, so the test outputs somelike like:
    
      I0214 18:13:07.095886  5028 raft_consensus.cc:572] T 
a32ee1063340424db8a7e7ea6aedcfa1 P ddfa34be67fd4a40b26218185403580c [term 3 
FOLLOWER]: Rejecting request to step down while not leader
      src/kudu/integration-tests/ts_tablet_manager-itest.cc:818: Failure
      Failed
      Bad status: Illegal state: Code NOT_THE_LEADER: Not currently leader
    
    To avoid test flakiness it makes sense to continue with next tablet
    when observing such an error.
    
    To verify the fix, I ran the scenario in TSAN configuration with and
    without this patch:
      before: 16 NOT_THE_LEADER failures in 256 runs
      after:   0 NOT_THE_LEADER failures in 256 runs
    Note, however: even after one source of flakiness is gone, this scenario
    is still very flaky.
    
    In addition, I revised other tests where LeaderStepDown() might fail
    due to fluctuating leadership and updated them accordingly.
    
    Change-Id: Ibbd6652cdcb30f8899767dfb932cc402cf739115
    Reviewed-on: http://gerrit.cloudera.org:8080/15224
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <[email protected]>
---
 src/kudu/integration-tests/raft_consensus-itest.cc | 65 ++++++++++++----------
 .../raft_consensus_election-itest.cc               |  8 +++
 .../raft_consensus_nonvoter-itest.cc               | 19 +++++--
 .../tombstoned_voting-stress-test.cc               |  3 +
 .../integration-tests/ts_tablet_manager-itest.cc   | 14 ++++-
 5 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc 
b/src/kudu/integration-tests/raft_consensus-itest.cc
index dfe2940..ef14ce6 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -461,6 +461,7 @@ void RaftConsensusITest::Write128KOpsToLeader(int 
num_writes) {
 
 void RaftConsensusITest::AssertMajorityRequiredForElectionsAndWrites(
     const TabletServerMap& tablet_servers, const string& leader_uuid) {
+  static const auto kTimeout = MonoDelta::FromSeconds(20);
 
   TServerDetails* initial_leader = FindOrDie(tablet_servers, leader_uuid);
 
@@ -497,12 +498,13 @@ void 
RaftConsensusITest::AssertMajorityRequiredForElectionsAndWrites(
                                   MonoDelta::FromMilliseconds(100));
     ASSERT_TRUE(s.IsTimedOut()) << s.ToString();
 
-    // Step down.
-    ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, 
MonoDelta::FromSeconds(10)));
+    // Step down. Scenarios which use this method turn off leader failure
+    // detection, so the leader role cannot fluctuate among tablet replicas.
+    ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, kTimeout));
 
     // Assert that elections time out without a live majority.
     // We specify a very short timeout here to keep the tests fast.
-    ASSERT_OK(StartElection(initial_leader, tablet_id_, 
MonoDelta::FromSeconds(10)));
+    ASSERT_OK(StartElection(initial_leader, tablet_id_, kTimeout));
     s = WaitUntilLeader(initial_leader, tablet_id_, 
MonoDelta::FromMilliseconds(100));
     ASSERT_TRUE(s.IsTimedOut()) << s.ToString();
     LOG(INFO) << "Expected timeout encountered on election with weakened 
config: " << s.ToString();
@@ -515,17 +517,17 @@ void 
RaftConsensusITest::AssertMajorityRequiredForElectionsAndWrites(
     }
   }
 
-  ASSERT_OK(WaitForServersToAgree(MonoDelta::FromSeconds(20), tablet_servers, 
tablet_id_, 1));
+  ASSERT_OK(WaitForServersToAgree(kTimeout, tablet_servers, tablet_id_, 1));
 
   // Now an election should succeed.
-  ASSERT_OK(StartElection(initial_leader, tablet_id_, 
MonoDelta::FromSeconds(10)));
-  ASSERT_OK(WaitUntilLeader(initial_leader, tablet_id_, 
MonoDelta::FromSeconds(10)));
+  ASSERT_OK(StartElection(initial_leader, tablet_id_, kTimeout));
+  ASSERT_OK(WaitUntilLeader(initial_leader, tablet_id_, kTimeout));
   LOG(INFO) << "Successful election with full config of size " << config_size;
 
   // And a write should also succeed.
   ASSERT_OK(WriteSimpleTestRow(initial_leader, tablet_id_, 
RowOperationsPB::UPDATE,
                                kTestRowKey, kTestRowIntVal, 
Substitute("qsz=$0", config_size),
-                               MonoDelta::FromSeconds(10)));
+                               kTimeout));
 }
 
 void RaftConsensusITest::CreateClusterForCrashyNodesTests(vector<string> 
extra_ts_flags) {
@@ -3094,7 +3096,7 @@ TEST_F(RaftConsensusITest, 
TestLeaderTransferWhenFollowerFallsBehindLeaderGC) {
     LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
     return;
   }
-
+  const auto kTimeout = MonoDelta::FromSeconds(30);
   vector<string> ts_flags = {
     // Disable follower eviction.
     "--evict_failed_followers=false",
@@ -3117,38 +3119,45 @@ TEST_F(RaftConsensusITest, 
TestLeaderTransferWhenFollowerFallsBehindLeaderGC) {
   TabletServerMap active_tablet_servers = tablet_servers_;
   ASSERT_EQ(3, active_tablet_servers.size());
   ASSERT_EQ(1, active_tablet_servers.erase(follower_uuid));
-  ASSERT_OK(WaitForServersToAgree(MonoDelta::FromSeconds(30), 
active_tablet_servers,
-                                  tablet_id_, 1));
+  ASSERT_OK(WaitForServersToAgree(kTimeout, active_tablet_servers, tablet_id_, 
1));
 
   // Try to transfer leadership to the follower that has fallen behind log GC.
-  auto* leader_ts = FindOrDie(tablet_servers_, leader_uuid);
-  ConsensusServiceProxy* c_proxy = 
CHECK_NOTNULL(leader_ts->consensus_proxy.get());
-  LeaderStepDownRequestPB req;
-  LeaderStepDownResponsePB resp;
-  RpcController rpc;
-
-  req.set_dest_uuid(leader_uuid);
-  req.set_tablet_id(tablet_id_);
-  req.set_mode(consensus::LeaderStepDownMode::GRACEFUL);
-  req.set_new_leader_uuid(follower_uuid);
-
-  // The request should succeed.
-  ASSERT_OK(c_proxy->LeaderStepDown(req, &resp, &rpc));
-  ASSERT_FALSE(resp.has_error()) << SecureDebugString(resp);
+  // The leader role might fluctuate among tablet replicas: LeaderStepDown()
+  // request will be retried in such rare cases because of ASSERT_EVENTUALLY().
+  ASSERT_EVENTUALLY([&] {
+    TServerDetails* leader_ts;
+    ASSERT_OK(FindTabletLeader(
+        tablet_servers_, tablet_id_, kTimeout, &leader_ts));
+    LeaderStepDownRequestPB req;
+    req.set_dest_uuid(leader_ts->uuid());
+    req.set_tablet_id(tablet_id_);
+    req.set_mode(consensus::LeaderStepDownMode::GRACEFUL);
+    req.set_new_leader_uuid(follower_uuid);
+
+    // The request should succeed.
+    ConsensusServiceProxy* c_proxy = 
CHECK_NOTNULL(leader_ts->consensus_proxy.get());
+    RpcController ctl;
+    LeaderStepDownResponsePB resp;
+    ASSERT_OK(c_proxy->LeaderStepDown(req, &resp, &ctl));
+    ASSERT_FALSE(resp.has_error()) << SecureDebugString(resp);
+  });
 
   // However, the leader will not be able to transfer leadership to the lagging
   // follower, and eventually will resume normal operation. We check this by
   // waiting for a write to succeed.
   ASSERT_EVENTUALLY([&] {
     WriteRequestPB w_req;
-    WriteResponsePB w_resp;
-    rpc.Reset();
-    rpc.set_timeout(MonoDelta::FromSeconds(30));
     w_req.set_tablet_id(tablet_id_);
     ASSERT_OK(SchemaToPB(schema_, w_req.mutable_schema()));
     AddTestRowToPB(RowOperationsPB::INSERT, schema_, kTestRowKey, 
kTestRowIntVal,
                    "hello world", w_req.mutable_row_operations());
-    ASSERT_OK(leader_ts->tserver_proxy->Write(w_req, &w_resp, &rpc));
+    TServerDetails* leader_ts;
+    ASSERT_OK(FindTabletLeader(
+        tablet_servers_, tablet_id_, kTimeout, &leader_ts));
+    RpcController ctl;
+    ctl.set_timeout(kTimeout);
+    WriteResponsePB w_resp;
+    ASSERT_OK(leader_ts->tserver_proxy->Write(w_req, &w_resp, &ctl));
   });
 }
 
diff --git a/src/kudu/integration-tests/raft_consensus_election-itest.cc 
b/src/kudu/integration-tests/raft_consensus_election-itest.cc
index 322e05f..287f8f6 100644
--- a/src/kudu/integration-tests/raft_consensus_election-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_election-itest.cc
@@ -325,6 +325,8 @@ TEST_F(RaftConsensusElectionITest, LeaderStepDown) {
                               consensus::EXCLUDE_HEALTH_REPORT, 
&cstate_before));
 
   // Step down and test that a 2nd stepdown returns the expected result.
+  // The leader role cannot fluctuate among tablet replicas because of
+  // --enable_leader_failure_detection=false setting.
   ASSERT_OK(LeaderStepDown(ts, tablet_id_, kTimeout));
 
   // Get the Raft term from the leader that has just stepped down.
@@ -413,6 +415,8 @@ TEST_P(RaftConsensusNumLeadersMetricTest, 
TestNumLeadersMetric) {
 
   // Renounce leadership on the rest.
   for (; idx < tablet_ids.size(); idx++) {
+    // The leader role cannot fluctuate among tablet replicas because of
+    // --enable_leader_failure_detection=false setting.
     ASSERT_OK(LeaderStepDown(ts, tablet_ids[idx], kTimeout));
     ASSERT_OK(get_num_leaders_metric(&num_leaders_metric));
     ASSERT_EQ(--num_leaders_expected, num_leaders_metric);
@@ -459,6 +463,8 @@ TEST_F(RaftConsensusElectionITest, 
StepDownWithSlowFollower) {
   SleepFor(MonoDelta::FromSeconds(1));
 
   // Step down should respond quickly despite the hung requests.
+  // The leader role cannot fluctuate among tablet replicas because of
+  // --enable_leader_failure_detection=false setting.
   ASSERT_OK(LeaderStepDown(tservers[0], tablet_id_, 
MonoDelta::FromSeconds(3)));
 }
 
@@ -549,6 +555,8 @@ TEST_F(RaftConsensusElectionITest, ElectPendingVoter) {
 
   // Now that TS 4 is electable (and pending), have TS 0 step down.
   LOG(INFO) << "Forcing Peer " << initial_leader->uuid() << " to step down...";
+  // The leader role cannot fluctuate among tablet replicas because of
+  // --enable_leader_failure_detection=false setting.
   ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, 
MonoDelta::FromSeconds(10)));
 
   // Resume TS 1 so we have a majority of 3 to elect a new leader.
diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc 
b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
index c4d9c86..6bf794e 100644
--- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
@@ -50,6 +50,7 @@
 #include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/tablet/metadata.pb.h"
 #include "kudu/tserver/tablet_server-test-base.h"
+#include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/sockaddr.h"
@@ -536,12 +537,14 @@ TEST_F(RaftConsensusNonVoterITest, AddNonVoterReplica) {
       WAIT_FOR_LEADER, ANY_REPLICA, &has_leader, &tablet_locations));
   ASSERT_TRUE(has_leader);
 
-  // Check the update cluster is able to elect a leader.
-  {
+  // Check the updated cluster is able to elect a leader.
+  // The leader role can fluctuate among tablet replicas, so the block below
+  // is wrapped into ASSERT_EVENTUALLY.
+  ASSERT_EVENTUALLY([&]() {
     TServerDetails* leader = nullptr;
     ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader));
     ASSERT_OK(LeaderStepDown(leader, tablet_id, kTimeout));
-  }
+  });
 
   // Make sure it's possible to insert more data into the table once it's 
backed
   // by one more (NON_VOTER) replica.
@@ -1090,7 +1093,15 @@ TEST_F(RaftConsensusNonVoterITest, 
PromotedReplicaCanVote) {
       if (leader->uuid() != new_replica_uuid) {
         break;
       }
-      ASSERT_OK(LeaderStepDown(leader, tablet_id, kTimeout));
+      // In rare cases, the leader replica can change its role right before the
+      // step-down request is received.
+      TabletServerErrorPB error;
+      auto s = LeaderStepDown(leader, tablet_id, kTimeout, &error);
+      if (s.IsIllegalState() &&
+          error.code() == TabletServerErrorPB::NOT_THE_LEADER) {
+        continue;
+      }
+      ASSERT_OK(s);
     }
 
     // Pause the newly added replica and the leader.
diff --git a/src/kudu/integration-tests/tombstoned_voting-stress-test.cc 
b/src/kudu/integration-tests/tombstoned_voting-stress-test.cc
index e300630..43de038 100644
--- a/src/kudu/integration-tests/tombstoned_voting-stress-test.cc
+++ b/src/kudu/integration-tests/tombstoned_voting-stress-test.cc
@@ -23,6 +23,7 @@
 #include <string>
 #include <thread>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include <gflags/gflags.h>
@@ -266,6 +267,8 @@ TEST_F(TombstonedVotingStressTest, 
TestTombstonedVotingUnderStress) {
   // We don't shut this node down because it will serve as the tablet copy
   // "source" during the test.
   LOG(INFO) << "forcing leader to step down...";
+  // The leader role cannot fluctuate in this scenario because of
+  // --enable_leader_failure_detection=false setting.
   ASSERT_OK(itest::LeaderStepDown(ts0_ets, tablet_id_, kTimeout));
 
   // Now we are done with setup. Start the "stress" part of the test.
diff --git a/src/kudu/integration-tests/ts_tablet_manager-itest.cc 
b/src/kudu/integration-tests/ts_tablet_manager-itest.cc
index f7b4ac6..d77ad1b 100644
--- a/src/kudu/integration-tests/ts_tablet_manager-itest.cc
+++ b/src/kudu/integration-tests/ts_tablet_manager-itest.cc
@@ -65,6 +65,7 @@
 #include "kudu/tserver/tablet_server.h"
 #include "kudu/tserver/tablet_server_options.h"
 #include "kudu/tserver/ts_tablet_manager.h"
+#include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/jsonwriter.h"
 #include "kudu/util/metrics.h"
@@ -815,7 +816,18 @@ TEST_F(TsTabletManagerITest, TestTableStats) {
         // Step down.
         itest::TServerDetails* tserver =
             CHECK_NOTNULL(FindOrDie(ts_map, replica->permanent_uuid()));
-        ASSERT_OK(LeaderStepDown(tserver, replica->tablet_id(), 
MonoDelta::FromSeconds(10)));
+        TabletServerErrorPB error;
+        auto s = LeaderStepDown(tserver, replica->tablet_id(),
+                                MonoDelta::FromSeconds(10), &error);
+        // In rare cases, the leader replica can change its role right before
+        // the step-down request is received.
+        if (s.IsIllegalState() &&
+            error.code() == TabletServerErrorPB::NOT_THE_LEADER) {
+          LOG(INFO) << Substitute("T:$0 P:$1: not a leader replica anymore",
+                                  replica->tablet_id(), 
replica->permanent_uuid());
+          continue;
+        }
+        ASSERT_OK(s);
         SleepFor(MonoDelta::FromMilliseconds(kMaxElectionTime));
         // Check stats after every election.
         NO_FATALS(CheckStats(kRowsCount));

Reply via email to