This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b2e4d952b6134a47db383fa2a0a88ac47cbaecc1 Author: Alexey Serbin <[email protected]> AuthorDate: Thu Apr 9 15:03:25 2020 -0700 [catalog manager] cache masters' addresses While troubleshooting one performance issue if running a big cluster with large number of tables and high rate of ConnectToMaster requests, in the logs I noticed many reports like the following: 0323 03:59:31.091574 (+114857us) spinlock_profiling.cc:243] Waited 114 ms on lock 0x4d0ee8c. stack: 0000000002398852 00000000020d45b3 0000000000a8f8fc 0000000000aa6300 000000000221aaa8 ... which translates into (anonymous namespace)::SubmitSpinLockProfileData() consensus::RaftConsensus::CommittedConfig() master::Master::GetMasterHostPorts() master::MasterServiceImpl::ConnectToMaster() rpc::GeneratedServiceIf::Handle() ... From the code it became apparent that the lock in question was LockGuard l(lock_); in RaftConsensus::CommittedConfig() accessor method. This patch introduces caching of the master host ports, so there is no need to fetch the information on the master addresses from the consensus metadata every time ConnectToMaster() is called. Since the membership for master Raft consensus is static, it's enough to fetch the information on the masters' addresses upon catalog initialization and use that information since then. If we implement dynamic master change config, we'll need to refresh the cached state on change config. This is a follow-up to 14912a1fd78ba7cf4d62bf934ae64d6f6f229ee6. Change-Id: Ic9afea2d708bd3060b3d9f5b672660e1d2dca910 Reviewed-on: http://gerrit.cloudera.org:8080/15704 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins Reviewed-by: Bankim Bhavsar <[email protected]> Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/master/catalog_manager.cc | 8 +++----- src/kudu/master/catalog_manager.h | 10 ++++++++++ src/kudu/master/master.cc | 10 +++++----- src/kudu/master/master.h | 4 ++-- src/kudu/master/master_service.cc | 12 ++++-------- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index 598c5f6..0f98119 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -825,13 +825,11 @@ Status CatalogManager::Init(bool is_first_run) { auto_rebalancer_ = std::move(task); } + RETURN_NOT_OK(master_->GetMasterHostPorts(&master_addresses_)); if (hms::HmsCatalog::IsEnabled()) { - vector<HostPortPB> master_addrs_pb; - RETURN_NOT_OK(master_->GetMasterHostPorts(&master_addrs_pb)); - string master_addresses = JoinMapped( - master_addrs_pb, - [] (const HostPortPB& hostport) { + master_addresses_, + [] (const HostPort& hostport) { return Substitute("$0:$1", hostport.host(), hostport.port()); }, ","); diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h index d6d5ff6..6650a76 100644 --- a/src/kudu/master/catalog_manager.h +++ b/src/kudu/master/catalog_manager.h @@ -48,6 +48,7 @@ #include "kudu/util/cow_object.h" #include "kudu/util/locks.h" #include "kudu/util/monotime.h" +#include "kudu/util/net/net_util.h" #include "kudu/util/oid_generator.h" #include "kudu/util/random.h" #include "kudu/util/rw_mutex.h" @@ -783,6 +784,10 @@ class CatalogManager : public tserver::TabletReplicaLookupIf { // name is returned. static std::string NormalizeTableName(const std::string& table_name); + const std::vector<HostPort>& master_addresses() const { + return master_addresses_; + } + private: // These tests call ElectedAsLeaderCb() directly. FRIEND_TEST(MasterTest, TestShutdownDuringTableVisit); @@ -1158,6 +1163,11 @@ class CatalogManager : public tserver::TabletReplicaLookupIf { // Always acquire this lock before state_lock_. RWMutex leader_lock_; + // Cached information on master addresses. It's populated in Init() since the + // membership of masters' Raft consensus is static (i.e. no new members are + // added or any existing removed). + std::vector<HostPort> master_addresses_; + // Async operations are accessing some private methods // (TODO: this stuff should be deferred and done in the background thread) friend class AsyncAlterTable; diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc index e64b179..e8f81c7 100644 --- a/src/kudu/master/master.cc +++ b/src/kudu/master/master.cc @@ -28,7 +28,6 @@ #include <glog/logging.h> #include "kudu/cfile/block_cache.h" -#include "kudu/common/common.pb.h" #include "kudu/common/wire_protocol.h" #include "kudu/common/wire_protocol.pb.h" #include "kudu/consensus/metadata.pb.h" @@ -367,7 +366,7 @@ Status Master::ListMasters(vector<ServerEntryPB>* masters) const { return Status::OK(); } -Status Master::GetMasterHostPorts(vector<HostPortPB>* hostports) const { +Status Master::GetMasterHostPorts(vector<HostPort>* hostports) const { auto consensus = catalog_manager_->master_consensus(); if (!consensus) { return Status::IllegalState("consensus not running"); @@ -375,7 +374,7 @@ Status Master::GetMasterHostPorts(vector<HostPortPB>* hostports) const { hostports->clear(); consensus::RaftConfigPB config = consensus->CommittedConfig(); - for (auto& peer : *config.mutable_peers()) { + for (const auto& peer : *config.mutable_peers()) { if (peer.member_type() == consensus::RaftPeerPB::VOTER) { // In non-distributed master configurations, we don't store our own // last known address in the Raft config. So, we'll fill it in from @@ -383,9 +382,10 @@ Status Master::GetMasterHostPorts(vector<HostPortPB>* hostports) const { if (!peer.has_last_known_addr()) { DCHECK_EQ(config.peers_size(), 1); DCHECK(registration_initialized_.load()); - hostports->emplace_back(registration_.rpc_addresses(0)); + DCHECK_GT(registration_.rpc_addresses_size(), 0); + hostports->emplace_back(HostPortFromPB(registration_.rpc_addresses(0))); } else { - hostports->emplace_back(std::move(*peer.mutable_last_known_addr())); + hostports->emplace_back(HostPortFromPB(peer.last_known_addr())); } } } diff --git a/src/kudu/master/master.h b/src/kudu/master/master.h index 3f77074..28670fe 100644 --- a/src/kudu/master/master.h +++ b/src/kudu/master/master.h @@ -32,7 +32,7 @@ namespace kudu { -class HostPortPB; +class HostPort; class MaintenanceManager; class MonoDelta; class ThreadPool; @@ -105,7 +105,7 @@ class Master : public kserver::KuduServer { // Gets the HostPorts for all of the masters in the cluster. // This is not as complete as ListMasters() above, but operates just // based on local state. - Status GetMasterHostPorts(std::vector<HostPortPB>* hostports) const; + Status GetMasterHostPorts(std::vector<HostPort>* hostports) const; // Crash the master on disk error. static void CrashMasterOnDiskError(const std::string& uuid); diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc index 50ed51d..b67cd02 100644 --- a/src/kudu/master/master_service.cc +++ b/src/kudu/master/master_service.cc @@ -629,14 +629,10 @@ void MasterServiceImpl::ConnectToMaster(const ConnectToMasterRequestPB* /*req*/, // Set the info about the other masters, so that the client can verify // it has the full set of info. - { - vector<HostPortPB> hostports; - WARN_NOT_OK(server_->GetMasterHostPorts(&hostports), - "unable to get HostPorts for masters"); - resp->mutable_master_addrs()->Reserve(hostports.size()); - for (auto& hp : hostports) { - *resp->add_master_addrs() = std::move(hp); - } + const auto& addresses = server_->catalog_manager()->master_addresses(); + resp->mutable_master_addrs()->Reserve(addresses.size()); + for (const auto& hp : addresses) { + *resp->add_master_addrs() = HostPortToPB(hp); } const bool is_leader = l.leader_status().ok();
