Random cleanups in RaftConsensus This patch implements various random cleanups in RaftConsensus and ConsensusMetadata, including adding a DCHECK and an AssertWaitAllowed(), in addition to only acquiring the RaftConsensus lock once in the election callback.
Change-Id: Ie470a526a4a0b0595b3d4310a961876e52a99b0a Reviewed-on: http://gerrit.cloudera.org:8080/9267 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <aser...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/84e0f303 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/84e0f303 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/84e0f303 Branch: refs/heads/master Commit: 84e0f30335554a0c459bb54535ff9ab2334ec235 Parents: 8760bb0 Author: Mike Percy <mpe...@apache.org> Authored: Thu Feb 8 20:14:24 2018 -0800 Committer: Mike Percy <mpe...@apache.org> Committed: Mon Feb 12 23:29:59 2018 +0000 ---------------------------------------------------------------------- src/kudu/consensus/consensus_meta.h | 2 +- src/kudu/consensus/raft_consensus.cc | 70 +++++++++++++++---------------- 2 files changed, 35 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/84e0f303/src/kudu/consensus/consensus_meta.h ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/consensus_meta.h b/src/kudu/consensus/consensus_meta.h index 4766575..ee4f591 100644 --- a/src/kudu/consensus/consensus_meta.h +++ b/src/kudu/consensus/consensus_meta.h @@ -156,7 +156,7 @@ class ConsensusMetadata : public RefCountedThreadSafe<ConsensusMetadata> { } // The on-disk size of the consensus metadata, as of the last call to - // Load() or Flush(). + // Load() or Flush(). This method is thread-safe. int64_t on_disk_size() const { return on_disk_size_.load(std::memory_order_relaxed); } http://git-wip-us.apache.org/repos/asf/kudu/blob/84e0f303/src/kudu/consensus/raft_consensus.cc ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc index 6a0d9c1..dc59d39 100644 --- a/src/kudu/consensus/raft_consensus.cc +++ b/src/kudu/consensus/raft_consensus.cc @@ -2374,6 +2374,7 @@ RaftConfigPB RaftConsensus::CommittedConfig() const { void RaftConsensus::DumpStatusHtml(std::ostream& out) const { RaftPeerPB::Role role; { + ThreadRestrictions::AssertWaitAllowed(); LockGuard l(lock_); if (state_ != kRunning) { out << "Tablet " << EscapeForHtmlToString(tablet_id()) << " not running" << std::endl; @@ -2415,42 +2416,6 @@ void RaftConsensus::DoElectionCallback(ElectionReason reason, const ElectionResu const bool was_pre_election = result.vote_request.is_pre_election(); const char* election_type = was_pre_election ? "pre-election" : "election"; - // Snooze to avoid the election timer firing again as much as possible. - { - ThreadRestrictions::AssertWaitAllowed(); - LockGuard l(lock_); - // We need to snooze when we win and when we lose: - // - When we win because we're about to disable the timer and become leader. - // - When we lose or otherwise we can fall into a cycle, where everyone keeps - // triggering elections but no election ever completes because by the time they - // finish another one is triggered already. - SnoozeFailureDetector(string("election complete"), LeaderElectionExpBackoffDeltaUnlocked()); - - if (result.decision == VOTE_DENIED) { - failed_elections_since_stable_leader_++; - - // If we called an election and one of the voters had a higher term than we did, - // we should bump our term before we potentially try again. This is particularly - // important with pre-elections to avoid getting "stuck" in a case like: - // Peer A: has ops through 1.10, term = 2, voted in term 2 for peer C - // Peer B: has ops through 1.15, term = 1 - // In this case, Peer B will reject peer A's pre-elections for term 3 because - // the local log is longer. Peer A will reject B's pre-elections for term 2 - // because it already voted in term 2. The check below ensures that peer B - // will bump to term 2 when it gets the vote rejection, such that its - // next pre-election (for term 3) would succeed. - if (result.highest_voter_term > CurrentTermUnlocked()) { - HandleTermAdvanceUnlocked(result.highest_voter_term); - } - - LOG_WITH_PREFIX_UNLOCKED(INFO) - << "Leader " << election_type << " lost for term " << election_term - << ". Reason: " - << (!result.message.empty() ? result.message : "None given"); - return; - } - } - // The vote was granted, become leader. ThreadRestrictions::AssertWaitAllowed(); UniqueLock lock(lock_); @@ -2462,6 +2427,38 @@ void RaftConsensus::DoElectionCallback(ElectionReason reason, const ElectionResu return; } + // Snooze to avoid the election timer firing again as much as possible. + // We need to snooze when we win and when we lose: + // - When we win because we're about to disable the timer and become leader. + // - When we lose or otherwise we can fall into a cycle, where everyone keeps + // triggering elections but no election ever completes because by the time they + // finish another one is triggered already. + SnoozeFailureDetector(string("election complete"), LeaderElectionExpBackoffDeltaUnlocked()); + + if (result.decision == VOTE_DENIED) { + failed_elections_since_stable_leader_++; + + // If we called an election and one of the voters had a higher term than we did, + // we should bump our term before we potentially try again. This is particularly + // important with pre-elections to avoid getting "stuck" in a case like: + // Peer A: has ops through 1.10, term = 2, voted in term 2 for peer C + // Peer B: has ops through 1.15, term = 1 + // In this case, Peer B will reject peer A's pre-elections for term 3 because + // the local log is longer. Peer A will reject B's pre-elections for term 2 + // because it already voted in term 2. The check below ensures that peer B + // will bump to term 2 when it gets the vote rejection, such that its + // next pre-election (for term 3) would succeed. + if (result.highest_voter_term > CurrentTermUnlocked()) { + HandleTermAdvanceUnlocked(result.highest_voter_term); + } + + LOG_WITH_PREFIX_UNLOCKED(INFO) + << "Leader " << election_type << " lost for term " << election_term + << ". Reason: " + << (!result.message.empty() ? result.message : "None given"); + return; + } + // In a pre-election, we collected votes for the _next_ term. // So, we need to adjust our expectations of what the current term should be. int64_t election_started_in_term = election_term; @@ -2683,6 +2680,7 @@ void RaftConsensus::DisableFailureDetector() { } void RaftConsensus::UpdateFailureDetectorState(boost::optional<MonoDelta> delta) { + DCHECK(lock_.is_locked()); const auto& uuid = peer_uuid(); if (uuid != cmeta_->leader_uuid() && cmeta_->IsVoterInConfig(uuid, ACTIVE_CONFIG)) {