This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 14912a1fd78ba7cf4d62bf934ae64d6f6f229ee6 Author: Alexey Serbin <[email protected]> AuthorDate: Thu Apr 9 15:25:08 2020 -0700 [catalog_manager] reduce contention in ScopedLeaderSharedLock While troubleshooting one performance issue if running a big cluster with large number of tables and high rate of ConnectToMaster requests, in the logs I noticed many reports like the following: 0323 03:59:31.091198 (+607857us) spinlock_profiling.cc:243] Waited 492 ms on lock 0x4cb0960. stack: 0000000002398852 0000000000ad8c69 0000000000aa62ba 000000000221aaa8 ... which translates into (anonymous namespace)::SubmitSpinLockProfileData() master::CatalogManager::ScopedLeaderSharedLock::ScopedLeaderSharedLock() master::MasterServiceImpl::ConnectToMaster() rpc::GeneratedServiceIf::Handle() ... From the code it became apparent that the lock in question was std::lock_guard<simple_spinlock> l(catalog_->state_lock_); in ScopedLeaderSharedLock() constructor. As far as I can see, there is no need to access master's Raft consensus information (which itself might wait on its internal locks if there is corresponding Raft-consensus activity) under the catalog's state lock. This patch shortens the critical section with catalog's state lock held when constructing CatalogManager::ScopedLeaderSharedLock instance. Change-Id: I3b2e6866a8a0d5bda9e2b1f01e0668427de60868 Reviewed-on: http://gerrit.cloudera.org:8080/15698 Reviewed-by: Andrew Wong <[email protected]> Tested-by: Kudu Jenkins --- src/kudu/master/catalog_manager.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index af1b5f1..598c5f6 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -5316,12 +5316,16 @@ CatalogManager::ScopedLeaderSharedLock::ScopedLeaderSharedLock( initial_term_(-1) { // Check if the catalog manager is running. - std::lock_guard<simple_spinlock> l(catalog_->state_lock_); - if (PREDICT_FALSE(catalog_->state_ != kRunning)) { - catalog_status_ = Status::ServiceUnavailable( - Substitute("Catalog manager is not initialized. State: $0", - StateToString(catalog_->state_))); - return; + int64_t leader_ready_term; + { + std::lock_guard<simple_spinlock> l(catalog_->state_lock_); + if (PREDICT_FALSE(catalog_->state_ != kRunning)) { + catalog_status_ = Status::ServiceUnavailable( + Substitute("Catalog manager is not initialized. State: $0", + StateToString(catalog_->state_))); + return; + } + leader_ready_term = catalog_->leader_ready_term_; } ConsensusStatePB cstate; @@ -5343,7 +5347,7 @@ CatalogManager::ScopedLeaderSharedLock::ScopedLeaderSharedLock( uuid, SecureShortDebugString(cstate))); return; } - if (PREDICT_FALSE(catalog_->leader_ready_term_ != cstate.current_term() || + if (PREDICT_FALSE(leader_ready_term != initial_term_ || !leader_shared_lock_.owns_lock())) { leader_status_ = Status::ServiceUnavailable( "Leader not yet ready to serve requests");
