This is an automated email from the ASF dual-hosted git repository. laiyingchun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 0299a254b3e832c94786b155c562cd5d5b46fcc3 Author: kedeng <[email protected]> AuthorDate: Fri Jun 7 17:14:56 2024 +0800 [metrics] Add metrics for tablet replica election Add tablet-level statistics to track the time consumed for replica leader election. For any tablet that initiates a leader election operation, the time will be recorded regardless of the outcome. The addition of monitoring items will aid in historical issue tracking and analysis, as well as facilitate the configuration of monitoring alarms. Change-Id: I2d501b514e90461998bbc64f77f2bb12805ebfaa Reviewed-on: http://gerrit.cloudera.org:8080/21490 Reviewed-by: Yingchun Lai <[email protected]> Reviewed-by: Zoltan Chovan <[email protected]> Tested-by: Yingchun Lai <[email protected]> --- src/kudu/consensus/leader_election.cc | 11 ++++--- src/kudu/consensus/leader_election.h | 10 ++++++- src/kudu/consensus/raft_consensus.cc | 17 +++++++++++ src/kudu/consensus/raft_consensus.h | 1 + src/kudu/integration-tests/raft_consensus-itest.cc | 35 ++++++++++++++++++++++ 5 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/kudu/consensus/leader_election.cc b/src/kudu/consensus/leader_election.cc index 4c8191870..d10d74524 100644 --- a/src/kudu/consensus/leader_election.cc +++ b/src/kudu/consensus/leader_election.cc @@ -160,11 +160,13 @@ string VoteCounter::GetElectionSummary() const { ElectionResult::ElectionResult(VoteRequestPB request, ElectionVote election_decision, ConsensusTerm highest_term, - string msg) + string msg, + MonoTime op_start_time) : vote_request(std::move(request)), decision(election_decision), highest_voter_term(highest_term), - message(std::move(msg)) { + message(std::move(msg)), + start_time(op_start_time) { DCHECK(!message.empty()); } @@ -207,6 +209,7 @@ LeaderElection::~LeaderElection() { void LeaderElection::Run() { VLOG_WITH_PREFIX(1) << "Running leader election."; + start_time_ = MonoTime::Now(); // Initialize voter state tracking. vector<string> other_voter_uuids; @@ -306,7 +309,7 @@ void LeaderElection::CheckForDecision() { string msg = election_won ? "achieved majority votes" : "could not achieve majority"; result_.reset(new ElectionResult( - request_, decision, highest_voter_term_, std::move(msg))); + request_, decision, highest_voter_term_, std::move(msg), start_time_)); } // Check whether to respond. This can happen as a result of either getting // a majority vote or of something invalidating the election, like @@ -400,7 +403,7 @@ void LeaderElection::HandleHigherTermUnlocked(const VoterState& state) { if (!result_) { LOG_WITH_PREFIX(INFO) << "Cancelling election due to peer responding with higher term"; result_.reset(new ElectionResult( - request_, VOTE_DENIED, state.response.responder_term(), std::move(msg))); + request_, VOTE_DENIED, state.response.responder_term(), std::move(msg), start_time_)); } } diff --git a/src/kudu/consensus/leader_election.h b/src/kudu/consensus/leader_election.h index c7fe8f837..81e20359d 100644 --- a/src/kudu/consensus/leader_election.h +++ b/src/kudu/consensus/leader_election.h @@ -98,7 +98,8 @@ class VoteCounter { struct ElectionResult { public: ElectionResult(VoteRequestPB request, ElectionVote election_decision, - ConsensusTerm highest_term, std::string msg); + ConsensusTerm highest_term, std::string msg, + MonoTime op_start_time); // The vote request that was sent to the voters for this election. const VoteRequestPB vote_request; @@ -111,6 +112,10 @@ struct ElectionResult { // Human-readable explanation of the vote result, if any. const std::string message; + + // The time at which the election began. + // Used to record the election duration. + const MonoTime start_time; }; // Driver class to run a leader election. @@ -239,6 +244,9 @@ class LeaderElection : public RefCountedThreadSafe<LeaderElection> { // The highest term seen from a voter so far (or 0 if no votes). int64_t highest_voter_term_; + + // The election's start time + MonoTime start_time_; }; } // namespace consensus diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc index 9d227e8b3..24d7d06da 100644 --- a/src/kudu/consensus/raft_consensus.cc +++ b/src/kudu/consensus/raft_consensus.cc @@ -162,6 +162,13 @@ METRIC_DEFINE_gauge_int64(tablet, time_since_last_leader_heartbeat, "in milliseconds. This metric is identically zero on a leader replica.", kudu::MetricLevel::kDebug); +METRIC_DEFINE_histogram(tablet, election_duration, + "Tablet Election Duration", + kudu::MetricUnit::kMilliseconds, + "Duration of tablet leader election as initiator. From the start of " + "the leader election to its completion, regardless of the outcome.", + kudu::MetricLevel::kDebug, + 3600000LU, 1); using google::protobuf::util::MessageDifferencer; using kudu::pb_util::SecureShortDebugString; @@ -257,6 +264,8 @@ Status RaftConsensus::Start(const ConsensusBootstrapInfo& info, MergeType::kMax) ->AutoDetach(&metric_detacher_); + election_duration_metric_ = METRIC_election_duration.Instantiate(metric_entity); + // A single Raft thread pool token is shared between RaftConsensus and // PeerManager. Because PeerManager is owned by RaftConsensus, it receives a // raw pointer to the token, to emphasize that RaftConsensus is responsible @@ -2665,6 +2674,8 @@ void RaftConsensus::ElectionCallback(ElectionReason reason, const ElectionResult auto self = shared_from_this(); Status s = raft_pool_token_->Submit([=]() { self->DoElectionCallback(reason, result); }); if (!s.ok()) { + election_duration_metric_->Increment( + (MonoTime::Now() - result.start_time).ToMilliseconds()); static const char* const msg = "unable to run election callback"; CHECK(s.IsServiceUnavailable()) << LogPrefixThreadSafe() << msg; WARN_NOT_OK(s, LogPrefixThreadSafe() + msg); @@ -2679,6 +2690,12 @@ void RaftConsensus::DoElectionCallback(ElectionReason reason, const ElectionResu ThreadRestrictions::AssertWaitAllowed(); UniqueLock l(lock_); + // Record the duration of the election regardless of the outcome. + auto update_metrics = MakeScopedCleanup([&]() { + election_duration_metric_->Increment( + (MonoTime::Now() - result.start_time).ToMilliseconds()); + }); + Status s = CheckRunningUnlocked(); if (PREDICT_FALSE(!s.ok())) { LOG_WITH_PREFIX_UNLOCKED(INFO) << "Received " << election_type << " callback for term " diff --git a/src/kudu/consensus/raft_consensus.h b/src/kudu/consensus/raft_consensus.h index dda15d1f5..6f26faf50 100644 --- a/src/kudu/consensus/raft_consensus.h +++ b/src/kudu/consensus/raft_consensus.h @@ -947,6 +947,7 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>, scoped_refptr<Counter> follower_memory_pressure_rejections_; scoped_refptr<AtomicGauge<int64_t>> term_metric_; scoped_refptr<AtomicGauge<int64_t>> num_failed_elections_metric_; + scoped_refptr<Histogram> election_duration_metric_; // NOTE: it's important that this is the first member to be destructed. This // ensures we do not attempt to collect metrics while calling the destructor. diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc index 9d5c1526f..1d5c95185 100644 --- a/src/kudu/integration-tests/raft_consensus-itest.cc +++ b/src/kudu/integration-tests/raft_consensus-itest.cc @@ -99,6 +99,7 @@ DECLARE_bool(encrypt_data_at_rest); METRIC_DECLARE_entity(server); METRIC_DECLARE_entity(tablet); METRIC_DECLARE_counter(transaction_memory_pressure_rejections); +METRIC_DECLARE_histogram(election_duration); METRIC_DECLARE_gauge_int64(time_since_last_leader_heartbeat); METRIC_DECLARE_gauge_int64(failed_elections_since_stable_leader); METRIC_DECLARE_gauge_uint64(hybrid_clock_timestamp); @@ -2783,6 +2784,40 @@ int64_t GetTimeSinceLastLeaderHeartbeat(const ExternalTabletServer* ets, return ret; } +int64_t GetElectionDurationTotalCount(const ExternalTabletServer* ets, + const std::string& tablet_id) { + int64_t ret; + CHECK_OK(GetInt64Metric( + ets->bound_http_hostport(), + &METRIC_ENTITY_tablet, + tablet_id.c_str(), + &METRIC_election_duration, + "total_count", + &ret)); + return ret; +} + +TEST_F(RaftConsensusITest, TestElectionDurationMetrics) { + constexpr auto kNumReplicas = 3; + constexpr auto kNumTservers = 3; + const vector<string> kTsFlags = { + // Make leader elections faster so we can test easier. + "--raft_heartbeat_interval_ms=100", + }; + const auto kTimeout = MonoDelta::FromSeconds(30); + + FLAGS_num_replicas = kNumReplicas; + FLAGS_num_tablet_servers = kNumTservers; + NO_FATALS(BuildAndStart(kTsFlags)); + ASSERT_OK(WaitForServersToAgree(kTimeout, tablet_servers_, tablet_id_, 1)); + TServerDetails* leader = nullptr; + ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader)); + const auto& leader_uuid = leader->uuid(); + + auto* leader_srv = cluster_->tablet_server_by_uuid(leader_uuid); + ASSERT_LT(0, GetElectionDurationTotalCount(leader_srv, tablet_id_)); +} + // Test for election-related metrics with the leader failure detection disabled. // This avoids inadvertent leader elections that might race with the tests // of the metric's behavior.
