[consensus] add test for non-voters in consensus queue Added an integration test to verify that the consensus queue correctly distinguishes between voter and non-voter replica acknowledgements of the Raft consensus messages.
The test is currently disabled because it fails: currently, the consensus queue does not differentiate between Raft acknowledgement messages sent by voter and non-voter replicas. Change-Id: I16382b75256a32d695d8bc9ce020f40276114511 Reviewed-on: http://gerrit.cloudera.org:8080/8867 Tested-by: Kudu Jenkins Reviewed-by: Mike Percy <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/710b238e Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/710b238e Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/710b238e Branch: refs/heads/master Commit: 710b238e775ae5d4b59cd0664e05972fcc0a7b2d Parents: 7f4157c Author: Alexey Serbin <[email protected]> Authored: Mon Dec 18 15:56:24 2017 -0800 Committer: Mike Percy <[email protected]> Committed: Mon Jan 8 19:38:08 2018 +0000 ---------------------------------------------------------------------- .../raft_consensus_nonvoter-itest.cc | 108 +++++++++++++++++++ 1 file changed, 108 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/710b238e/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc index 9c9121c..740c559 100644 --- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc +++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc @@ -77,6 +77,7 @@ using kudu::consensus::RaftPeerPB; using kudu::consensus::IsRaftConfigMember; using kudu::consensus::IsRaftConfigVoter; using kudu::itest::AddServer; +using kudu::itest::GetConsensusState; using kudu::itest::GetInt64Metric; using kudu::itest::GetTableLocations; using kudu::itest::GetTabletLocations; @@ -1715,5 +1716,112 @@ TEST_F(RaftConsensusNonVoterITest, RestartClusterWithNonVoter) { << "replacement replica UUID: " << new_replica_uuid; } +// This test verifies that the consensus queue correctly distinguishes between +// voter and non-voter acknowledgements of the Raft messages. Essentially, the +// leader replica's consensus queue should not count an ack message from +// a non-voter replica as if it was sent by a voter replica. +// +// The test scenario is simple: try to make a configuration change in a 3 voter +// Raft cluster, adding a new non-voter replica, when a majority of voters +// is not online. Make sure the configuration change is not committed. +TEST_F(RaftConsensusNonVoterITest, DISABLED_NonVoterReplicasInConsensusQueue) { + if (!AllowSlowTests()) { + LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run"; + return; + } + + const MonoDelta kTimeout = MonoDelta::FromSeconds(60); + const int kOriginalReplicasNum = 3; + const int kHbIntervalMs = 50; + const vector<string> kMasterFlags = { + // Don't evict excess replicas to avoid races in the scenario. + "--catalog_manager_evict_excess_replicas=false", + }; + const vector<string> kTserverFlags = { + Substitute("--raft_heartbeat_interval_ms=$0", kHbIntervalMs), + }; + + FLAGS_num_replicas = kOriginalReplicasNum; + FLAGS_num_tablet_servers = kOriginalReplicasNum + 1; + NO_FATALS(BuildAndStart(kTserverFlags, kMasterFlags)); + ASSERT_EQ(kOriginalReplicasNum + 1, tablet_servers_.size()); + + const string& tablet_id = tablet_id_; + TabletServerMap replica_servers; + for (const auto& e : tablet_replicas_) { + if (e.first == tablet_id) { + replica_servers.emplace(e.second->uuid(), e.second); + } + } + ASSERT_EQ(FLAGS_num_replicas, replica_servers.size()); + + ASSERT_OK(WaitForServersToAgree(kTimeout, replica_servers, tablet_id, 1)); + + TServerDetails* new_replica = nullptr; + for (const auto& ts : tablet_servers_) { + if (replica_servers.find(ts.first) == replica_servers.end()) { + new_replica = ts.second; + break; + } + } + ASSERT_NE(nullptr, new_replica); + + // Disable failure detection for all replicas. + for (int i = 0; i < cluster_->num_tablet_servers(); ++i) { + ExternalTabletServer* ts = cluster_->tablet_server(i); + ASSERT_OK(cluster_->SetFlag(ts, + "enable_leader_failure_detection", "false")); + } + + // Pause all but the leader replica and try to add a new non-voter into the + // configuration. It should not pass. + TServerDetails* leader; + ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id, &leader)); + + const auto do_resume = [&] { + for (auto& e : replica_servers) { + const auto& uuid = e.first; + if (uuid == leader->uuid()) { + continue; + } + ExternalTabletServer* ts = cluster_->tablet_server_by_uuid(uuid); + ASSERT_OK(ts->Resume()); + } + }; + auto resumer = MakeScopedCleanup([&] { + do_resume(); + }); + + for (auto& e : replica_servers) { + const auto& uuid = e.first; + if (uuid == leader->uuid()) { + continue; + } + ExternalTabletServer* ts = cluster_->tablet_server_by_uuid(uuid); + ASSERT_OK(ts->Pause()); + } + + const Status s = AddServer(leader, tablet_id, new_replica, + RaftPeerPB::NON_VOTER, kTimeout); + EXPECT_FALSE(s.ok()) << s.ToString(); + + NO_FATALS(do_resume()); + resumer.cancel(); + + // Verify that the configuration hasn't changed. + consensus::ConsensusStatePB cstate; + ASSERT_EVENTUALLY([&] { + ASSERT_OK(GetConsensusState(leader, tablet_id, kTimeout, &cstate)); + ASSERT_FALSE(cstate.has_pending_config()); + }); + const auto& new_replica_uuid = new_replica->uuid(); + EXPECT_FALSE(IsRaftConfigMember(new_replica_uuid, cstate.committed_config())) + << pb_util::SecureDebugString(cstate.committed_config()) + << "new non-voter replica UUID: " << new_replica_uuid; + EXPECT_EQ(kOriginalReplicasNum, cstate.committed_config().peers_size()); + + NO_FATALS(cluster_->AssertNoCrashes()); +} + } // namespace tserver } // namespace kudu
