This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b984851f704808f5ee07182811527e811389aa57 Author: Alexey Serbin <[email protected]> AuthorDate: Fri Feb 14 11:24:17 2020 -0800 [test] fix flake in TsTabletManagerITest::TestTableStats Sometimes the leader replica of a tablet can change its role prior to receiving StepDown request, so the test outputs somelike like: I0214 18:13:07.095886 5028 raft_consensus.cc:572] T a32ee1063340424db8a7e7ea6aedcfa1 P ddfa34be67fd4a40b26218185403580c [term 3 FOLLOWER]: Rejecting request to step down while not leader src/kudu/integration-tests/ts_tablet_manager-itest.cc:818: Failure Failed Bad status: Illegal state: Code NOT_THE_LEADER: Not currently leader To avoid test flakiness it makes sense to continue with next tablet when observing such an error. To verify the fix, I ran the scenario in TSAN configuration with and without this patch: before: 16 NOT_THE_LEADER failures in 256 runs after: 0 NOT_THE_LEADER failures in 256 runs Note, however: even after one source of flakiness is gone, this scenario is still very flaky. In addition, I revised other tests where LeaderStepDown() might fail due to fluctuating leadership and updated them accordingly. Change-Id: Ibbd6652cdcb30f8899767dfb932cc402cf739115 Reviewed-on: http://gerrit.cloudera.org:8080/15224 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo <[email protected]> --- src/kudu/integration-tests/raft_consensus-itest.cc | 65 ++++++++++++---------- .../raft_consensus_election-itest.cc | 8 +++ .../raft_consensus_nonvoter-itest.cc | 19 +++++-- .../tombstoned_voting-stress-test.cc | 3 + .../integration-tests/ts_tablet_manager-itest.cc | 14 ++++- 5 files changed, 76 insertions(+), 33 deletions(-) diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc index dfe2940..ef14ce6 100644 --- a/src/kudu/integration-tests/raft_consensus-itest.cc +++ b/src/kudu/integration-tests/raft_consensus-itest.cc @@ -461,6 +461,7 @@ void RaftConsensusITest::Write128KOpsToLeader(int num_writes) { void RaftConsensusITest::AssertMajorityRequiredForElectionsAndWrites( const TabletServerMap& tablet_servers, const string& leader_uuid) { + static const auto kTimeout = MonoDelta::FromSeconds(20); TServerDetails* initial_leader = FindOrDie(tablet_servers, leader_uuid); @@ -497,12 +498,13 @@ void RaftConsensusITest::AssertMajorityRequiredForElectionsAndWrites( MonoDelta::FromMilliseconds(100)); ASSERT_TRUE(s.IsTimedOut()) << s.ToString(); - // Step down. - ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, MonoDelta::FromSeconds(10))); + // Step down. Scenarios which use this method turn off leader failure + // detection, so the leader role cannot fluctuate among tablet replicas. + ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, kTimeout)); // Assert that elections time out without a live majority. // We specify a very short timeout here to keep the tests fast. - ASSERT_OK(StartElection(initial_leader, tablet_id_, MonoDelta::FromSeconds(10))); + ASSERT_OK(StartElection(initial_leader, tablet_id_, kTimeout)); s = WaitUntilLeader(initial_leader, tablet_id_, MonoDelta::FromMilliseconds(100)); ASSERT_TRUE(s.IsTimedOut()) << s.ToString(); LOG(INFO) << "Expected timeout encountered on election with weakened config: " << s.ToString(); @@ -515,17 +517,17 @@ void RaftConsensusITest::AssertMajorityRequiredForElectionsAndWrites( } } - ASSERT_OK(WaitForServersToAgree(MonoDelta::FromSeconds(20), tablet_servers, tablet_id_, 1)); + ASSERT_OK(WaitForServersToAgree(kTimeout, tablet_servers, tablet_id_, 1)); // Now an election should succeed. - ASSERT_OK(StartElection(initial_leader, tablet_id_, MonoDelta::FromSeconds(10))); - ASSERT_OK(WaitUntilLeader(initial_leader, tablet_id_, MonoDelta::FromSeconds(10))); + ASSERT_OK(StartElection(initial_leader, tablet_id_, kTimeout)); + ASSERT_OK(WaitUntilLeader(initial_leader, tablet_id_, kTimeout)); LOG(INFO) << "Successful election with full config of size " << config_size; // And a write should also succeed. ASSERT_OK(WriteSimpleTestRow(initial_leader, tablet_id_, RowOperationsPB::UPDATE, kTestRowKey, kTestRowIntVal, Substitute("qsz=$0", config_size), - MonoDelta::FromSeconds(10))); + kTimeout)); } void RaftConsensusITest::CreateClusterForCrashyNodesTests(vector<string> extra_ts_flags) { @@ -3094,7 +3096,7 @@ TEST_F(RaftConsensusITest, TestLeaderTransferWhenFollowerFallsBehindLeaderGC) { LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run"; return; } - + const auto kTimeout = MonoDelta::FromSeconds(30); vector<string> ts_flags = { // Disable follower eviction. "--evict_failed_followers=false", @@ -3117,38 +3119,45 @@ TEST_F(RaftConsensusITest, TestLeaderTransferWhenFollowerFallsBehindLeaderGC) { TabletServerMap active_tablet_servers = tablet_servers_; ASSERT_EQ(3, active_tablet_servers.size()); ASSERT_EQ(1, active_tablet_servers.erase(follower_uuid)); - ASSERT_OK(WaitForServersToAgree(MonoDelta::FromSeconds(30), active_tablet_servers, - tablet_id_, 1)); + ASSERT_OK(WaitForServersToAgree(kTimeout, active_tablet_servers, tablet_id_, 1)); // Try to transfer leadership to the follower that has fallen behind log GC. - auto* leader_ts = FindOrDie(tablet_servers_, leader_uuid); - ConsensusServiceProxy* c_proxy = CHECK_NOTNULL(leader_ts->consensus_proxy.get()); - LeaderStepDownRequestPB req; - LeaderStepDownResponsePB resp; - RpcController rpc; - - req.set_dest_uuid(leader_uuid); - req.set_tablet_id(tablet_id_); - req.set_mode(consensus::LeaderStepDownMode::GRACEFUL); - req.set_new_leader_uuid(follower_uuid); - - // The request should succeed. - ASSERT_OK(c_proxy->LeaderStepDown(req, &resp, &rpc)); - ASSERT_FALSE(resp.has_error()) << SecureDebugString(resp); + // The leader role might fluctuate among tablet replicas: LeaderStepDown() + // request will be retried in such rare cases because of ASSERT_EVENTUALLY(). + ASSERT_EVENTUALLY([&] { + TServerDetails* leader_ts; + ASSERT_OK(FindTabletLeader( + tablet_servers_, tablet_id_, kTimeout, &leader_ts)); + LeaderStepDownRequestPB req; + req.set_dest_uuid(leader_ts->uuid()); + req.set_tablet_id(tablet_id_); + req.set_mode(consensus::LeaderStepDownMode::GRACEFUL); + req.set_new_leader_uuid(follower_uuid); + + // The request should succeed. + ConsensusServiceProxy* c_proxy = CHECK_NOTNULL(leader_ts->consensus_proxy.get()); + RpcController ctl; + LeaderStepDownResponsePB resp; + ASSERT_OK(c_proxy->LeaderStepDown(req, &resp, &ctl)); + ASSERT_FALSE(resp.has_error()) << SecureDebugString(resp); + }); // However, the leader will not be able to transfer leadership to the lagging // follower, and eventually will resume normal operation. We check this by // waiting for a write to succeed. ASSERT_EVENTUALLY([&] { WriteRequestPB w_req; - WriteResponsePB w_resp; - rpc.Reset(); - rpc.set_timeout(MonoDelta::FromSeconds(30)); w_req.set_tablet_id(tablet_id_); ASSERT_OK(SchemaToPB(schema_, w_req.mutable_schema())); AddTestRowToPB(RowOperationsPB::INSERT, schema_, kTestRowKey, kTestRowIntVal, "hello world", w_req.mutable_row_operations()); - ASSERT_OK(leader_ts->tserver_proxy->Write(w_req, &w_resp, &rpc)); + TServerDetails* leader_ts; + ASSERT_OK(FindTabletLeader( + tablet_servers_, tablet_id_, kTimeout, &leader_ts)); + RpcController ctl; + ctl.set_timeout(kTimeout); + WriteResponsePB w_resp; + ASSERT_OK(leader_ts->tserver_proxy->Write(w_req, &w_resp, &ctl)); }); } diff --git a/src/kudu/integration-tests/raft_consensus_election-itest.cc b/src/kudu/integration-tests/raft_consensus_election-itest.cc index 322e05f..287f8f6 100644 --- a/src/kudu/integration-tests/raft_consensus_election-itest.cc +++ b/src/kudu/integration-tests/raft_consensus_election-itest.cc @@ -325,6 +325,8 @@ TEST_F(RaftConsensusElectionITest, LeaderStepDown) { consensus::EXCLUDE_HEALTH_REPORT, &cstate_before)); // Step down and test that a 2nd stepdown returns the expected result. + // The leader role cannot fluctuate among tablet replicas because of + // --enable_leader_failure_detection=false setting. ASSERT_OK(LeaderStepDown(ts, tablet_id_, kTimeout)); // Get the Raft term from the leader that has just stepped down. @@ -413,6 +415,8 @@ TEST_P(RaftConsensusNumLeadersMetricTest, TestNumLeadersMetric) { // Renounce leadership on the rest. for (; idx < tablet_ids.size(); idx++) { + // The leader role cannot fluctuate among tablet replicas because of + // --enable_leader_failure_detection=false setting. ASSERT_OK(LeaderStepDown(ts, tablet_ids[idx], kTimeout)); ASSERT_OK(get_num_leaders_metric(&num_leaders_metric)); ASSERT_EQ(--num_leaders_expected, num_leaders_metric); @@ -459,6 +463,8 @@ TEST_F(RaftConsensusElectionITest, StepDownWithSlowFollower) { SleepFor(MonoDelta::FromSeconds(1)); // Step down should respond quickly despite the hung requests. + // The leader role cannot fluctuate among tablet replicas because of + // --enable_leader_failure_detection=false setting. ASSERT_OK(LeaderStepDown(tservers[0], tablet_id_, MonoDelta::FromSeconds(3))); } @@ -549,6 +555,8 @@ TEST_F(RaftConsensusElectionITest, ElectPendingVoter) { // Now that TS 4 is electable (and pending), have TS 0 step down. LOG(INFO) << "Forcing Peer " << initial_leader->uuid() << " to step down..."; + // The leader role cannot fluctuate among tablet replicas because of + // --enable_leader_failure_detection=false setting. ASSERT_OK(LeaderStepDown(initial_leader, tablet_id_, MonoDelta::FromSeconds(10))); // Resume TS 1 so we have a majority of 3 to elect a new leader. diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc index c4d9c86..6bf794e 100644 --- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc +++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc @@ -50,6 +50,7 @@ #include "kudu/mini-cluster/external_mini_cluster.h" #include "kudu/tablet/metadata.pb.h" #include "kudu/tserver/tablet_server-test-base.h" +#include "kudu/tserver/tserver.pb.h" #include "kudu/util/metrics.h" #include "kudu/util/monotime.h" #include "kudu/util/net/sockaddr.h" @@ -536,12 +537,14 @@ TEST_F(RaftConsensusNonVoterITest, AddNonVoterReplica) { WAIT_FOR_LEADER, ANY_REPLICA, &has_leader, &tablet_locations)); ASSERT_TRUE(has_leader); - // Check the update cluster is able to elect a leader. - { + // Check the updated cluster is able to elect a leader. + // The leader role can fluctuate among tablet replicas, so the block below + // is wrapped into ASSERT_EVENTUALLY. + ASSERT_EVENTUALLY([&]() { TServerDetails* leader = nullptr; ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader)); ASSERT_OK(LeaderStepDown(leader, tablet_id, kTimeout)); - } + }); // Make sure it's possible to insert more data into the table once it's backed // by one more (NON_VOTER) replica. @@ -1090,7 +1093,15 @@ TEST_F(RaftConsensusNonVoterITest, PromotedReplicaCanVote) { if (leader->uuid() != new_replica_uuid) { break; } - ASSERT_OK(LeaderStepDown(leader, tablet_id, kTimeout)); + // In rare cases, the leader replica can change its role right before the + // step-down request is received. + TabletServerErrorPB error; + auto s = LeaderStepDown(leader, tablet_id, kTimeout, &error); + if (s.IsIllegalState() && + error.code() == TabletServerErrorPB::NOT_THE_LEADER) { + continue; + } + ASSERT_OK(s); } // Pause the newly added replica and the leader. diff --git a/src/kudu/integration-tests/tombstoned_voting-stress-test.cc b/src/kudu/integration-tests/tombstoned_voting-stress-test.cc index e300630..43de038 100644 --- a/src/kudu/integration-tests/tombstoned_voting-stress-test.cc +++ b/src/kudu/integration-tests/tombstoned_voting-stress-test.cc @@ -23,6 +23,7 @@ #include <string> #include <thread> #include <unordered_map> +#include <utility> #include <vector> #include <gflags/gflags.h> @@ -266,6 +267,8 @@ TEST_F(TombstonedVotingStressTest, TestTombstonedVotingUnderStress) { // We don't shut this node down because it will serve as the tablet copy // "source" during the test. LOG(INFO) << "forcing leader to step down..."; + // The leader role cannot fluctuate in this scenario because of + // --enable_leader_failure_detection=false setting. ASSERT_OK(itest::LeaderStepDown(ts0_ets, tablet_id_, kTimeout)); // Now we are done with setup. Start the "stress" part of the test. diff --git a/src/kudu/integration-tests/ts_tablet_manager-itest.cc b/src/kudu/integration-tests/ts_tablet_manager-itest.cc index f7b4ac6..d77ad1b 100644 --- a/src/kudu/integration-tests/ts_tablet_manager-itest.cc +++ b/src/kudu/integration-tests/ts_tablet_manager-itest.cc @@ -65,6 +65,7 @@ #include "kudu/tserver/tablet_server.h" #include "kudu/tserver/tablet_server_options.h" #include "kudu/tserver/ts_tablet_manager.h" +#include "kudu/tserver/tserver.pb.h" #include "kudu/util/countdown_latch.h" #include "kudu/util/jsonwriter.h" #include "kudu/util/metrics.h" @@ -815,7 +816,18 @@ TEST_F(TsTabletManagerITest, TestTableStats) { // Step down. itest::TServerDetails* tserver = CHECK_NOTNULL(FindOrDie(ts_map, replica->permanent_uuid())); - ASSERT_OK(LeaderStepDown(tserver, replica->tablet_id(), MonoDelta::FromSeconds(10))); + TabletServerErrorPB error; + auto s = LeaderStepDown(tserver, replica->tablet_id(), + MonoDelta::FromSeconds(10), &error); + // In rare cases, the leader replica can change its role right before + // the step-down request is received. + if (s.IsIllegalState() && + error.code() == TabletServerErrorPB::NOT_THE_LEADER) { + LOG(INFO) << Substitute("T:$0 P:$1: not a leader replica anymore", + replica->tablet_id(), replica->permanent_uuid()); + continue; + } + ASSERT_OK(s); SleepFor(MonoDelta::FromMilliseconds(kMaxElectionTime)); // Check stats after every election. NO_FATALS(CheckStats(kRowsCount));
