This is an automated email from the ASF dual-hosted git repository. awong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 717349b4e9ee7666b43a491bc5d07d55ec5de003 Author: Todd Lipcon <[email protected]> AuthorDate: Tue Feb 26 12:25:35 2019 -0800 KUDU-2711 (part 2): use a RWMutex for TSDescriptor One reason that GetTableLocations can end up slow is contention on the TSDescriptor object. This patch changes the spinlock to an rw_spinlock. The benchmark improves throughput by about 77%: table_locations-itest --gtest_filter=\*Bench\* \ --benchmark_num_tablets 300 --benchmark_num_threads 30 \ --benchmark_runtime_secs=10 before fix: Count: 10555 Mean: 8567.29 Percentiles: 0% (min) = 68 25% = 7712 50% (med) = 8320 75% = 9152 95% = 10880 99% = 12224 99.9% = 17024 99.99% = 27776 100% (max) = 30384 with rwlock: Count: 18758 Mean: 4024.09 Percentiles: 0% (min) = 136 25% = 3472 50% (med) = 4032 75% = 4512 95% = 5248 99% = 6048 99.9% = 7712 99.99% = 13440 100% (max) = 13976 Change-Id: Id48635e49f11d20fa1802b17a3ff5771632dfd01 Reviewed-on: http://gerrit.cloudera.org:8080/12614 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo <[email protected]> Reviewed-by: Todd Lipcon <[email protected]> Reviewed-by: Alexey Serbin <[email protected]> --- src/kudu/master/ts_descriptor.cc | 31 ++++++++++++++++--------------- src/kudu/master/ts_descriptor.h | 8 ++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc index 208adac..40586eb 100644 --- a/src/kudu/master/ts_descriptor.cc +++ b/src/kudu/master/ts_descriptor.cc @@ -160,7 +160,7 @@ Status TSDescriptor::Register(const NodeInstancePB& instance, LocationCache* location_cache) { // Do basic registration work under the lock. { - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); RETURN_NOT_OK(RegisterUnlocked(instance, registration)); } @@ -181,7 +181,7 @@ Status TSDescriptor::Register(const NodeInstancePB& instance, // Assign the location under the lock if location resolution succeeds. If // it fails, log the error. if (s.ok()) { - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); location_.emplace(std::move(location)); } else { KLOG_EVERY_N_SECS(ERROR, 60) << Substitute( @@ -195,13 +195,13 @@ Status TSDescriptor::Register(const NodeInstancePB& instance, } void TSDescriptor::UpdateHeartbeatTime() { - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); last_heartbeat_ = MonoTime::Now(); } MonoDelta TSDescriptor::TimeSinceHeartbeat() const { MonoTime now(MonoTime::Now()); - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); return now - last_heartbeat_; } @@ -210,7 +210,7 @@ bool TSDescriptor::PresumedDead() const { } int64_t TSDescriptor::latest_seqno() const { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); return latest_seqno_; } @@ -232,25 +232,26 @@ void TSDescriptor::DecayRecentReplicaCreationsUnlocked() { } void TSDescriptor::IncrementRecentReplicaCreations() { - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); DecayRecentReplicaCreationsUnlocked(); recent_replica_creations_ += 1; } double TSDescriptor::RecentReplicaCreations() { - std::lock_guard<simple_spinlock> l(lock_); + // NOTE: not a shared lock because of the "Decay" side effect. + std::lock_guard<rw_spinlock> l(lock_); DecayRecentReplicaCreationsUnlocked(); return recent_replica_creations_; } void TSDescriptor::GetRegistration(ServerRegistrationPB* reg) const { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); CHECK(registration_) << "No registration"; CHECK_NOTNULL(reg)->CopyFrom(*registration_); } void TSDescriptor::GetNodeInstancePB(NodeInstancePB* instance_pb) const { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); instance_pb->set_permanent_uuid(permanent_uuid_); instance_pb->set_instance_seqno(latest_seqno_); } @@ -258,7 +259,7 @@ void TSDescriptor::GetNodeInstancePB(NodeInstancePB* instance_pb) const { Status TSDescriptor::ResolveSockaddr(Sockaddr* addr, string* host) const { vector<HostPort> hostports; { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); for (const HostPortPB& addr : registration_->rpc_addresses()) { hostports.emplace_back(addr.host(), addr.port()); } @@ -293,7 +294,7 @@ Status TSDescriptor::ResolveSockaddr(Sockaddr* addr, string* host) const { Status TSDescriptor::GetTSAdminProxy(const shared_ptr<rpc::Messenger>& messenger, shared_ptr<tserver::TabletServerAdminServiceProxy>* proxy) { { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); if (ts_admin_proxy_) { *proxy = ts_admin_proxy_; return Status::OK(); @@ -304,7 +305,7 @@ Status TSDescriptor::GetTSAdminProxy(const shared_ptr<rpc::Messenger>& messenger string host; RETURN_NOT_OK(ResolveSockaddr(&addr, &host)); - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); if (!ts_admin_proxy_) { ts_admin_proxy_.reset(new tserver::TabletServerAdminServiceProxy( messenger, addr, std::move(host))); @@ -316,7 +317,7 @@ Status TSDescriptor::GetTSAdminProxy(const shared_ptr<rpc::Messenger>& messenger Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messenger, shared_ptr<consensus::ConsensusServiceProxy>* proxy) { { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); if (consensus_proxy_) { *proxy = consensus_proxy_; return Status::OK(); @@ -327,7 +328,7 @@ Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messeng string host; RETURN_NOT_OK(ResolveSockaddr(&addr, &host)); - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); if (!consensus_proxy_) { consensus_proxy_.reset(new consensus::ConsensusServiceProxy( messenger, addr, std::move(host))); @@ -337,7 +338,7 @@ Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messeng } string TSDescriptor::ToString() const { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); const auto& addr = registration_->rpc_addresses(0); return Substitute("$0 ($1:$2)", permanent_uuid_, addr.host(), addr.port()); } diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h index 76a5fa8..48cc994 100644 --- a/src/kudu/master/ts_descriptor.h +++ b/src/kudu/master/ts_descriptor.h @@ -114,13 +114,13 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> { // Set the number of live replicas (i.e. running or bootstrapping). void set_num_live_replicas(int n) { DCHECK_GE(n, 0); - std::lock_guard<simple_spinlock> l(lock_); + std::lock_guard<rw_spinlock> l(lock_); num_live_replicas_ = n; } // Return the number of live replicas (i.e running or bootstrapping). int num_live_replicas() const { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); return num_live_replicas_; } @@ -128,7 +128,7 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> { // since the location could change at any time if the tablet server // re-registers. boost::optional<std::string> location() const { - std::lock_guard<simple_spinlock> l(lock_); + shared_lock<rw_spinlock> l(lock_); return location_; } @@ -153,7 +153,7 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> { void DecayRecentReplicaCreationsUnlocked(); - mutable simple_spinlock lock_; + mutable rw_spinlock lock_; const std::string permanent_uuid_; int64_t latest_seqno_;
