This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new e338c3118 IMPALA-12208: Avoid registering RPCs with ClientRequestState after Finalization e338c3118 is described below commit e338c311849ca305092e1ceefec8de2490706e39 Author: Kurt Deschler <kdesc...@cloudera.com> AuthorDate: Mon Jun 12 22:50:33 2023 -0500 IMPALA-12208: Avoid registering RPCs with ClientRequestState after Finalization This patch fixes a timing hole where concurrent RPCs could register with ClientRequestState after Finalize() had already been called, resulting in DCHECK(pending_rpcs_.empty()) failure in debug builds since the RPCs were not unregistered before the ClientRequestState was destroyed. This DCHECK ensures that timing info for registered RPCs is reaped wherever possible. Prod builds did not exhibit any negative symptoms. Testing: -Ran tests/hs2/test_hs2.py::TestHS2::test_concurrent_unregister in a loop for several minutes. Previously this would fail within seconds. Change-Id: I66415e5695c2ed65518efee2d9dbaaec224910e9 Reviewed-on: http://gerrit.cloudera.org:8080/20065 Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com> Reviewed-by: Csaba Ringhofer <csringho...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/service/client-request-state.cc | 9 ++++++--- be/src/service/client-request-state.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc index 7d57d8745..7bb4338ef 100644 --- a/be/src/service/client-request-state.cc +++ b/be/src/service/client-request-state.cc @@ -184,8 +184,9 @@ ClientRequestState::ClientRequestState(const TQueryCtx& query_ctx, Frontend* fro ClientRequestState::~ClientRequestState() { DCHECK(wait_thread_.get() == NULL) << "BlockOnWait() needs to be called!"; - DCHECK(pending_rpcs_.empty()); - UnRegisterRemainingRPCs(); + DCHECK(!track_rpcs_); // Should get set to false in Finalize() + DCHECK(pending_rpcs_.empty()); // Should get cleared in Finalize() + UnRegisterRemainingRPCs(); // Avoid memory leaks if Finalize() didn't get called } Status ClientRequestState::SetResultCache(QueryResultSet* cache, @@ -1088,6 +1089,7 @@ Status ClientRequestState::Finalize(bool check_inflight, const Status* cause) { // Update the timeline here so that all of the above work is captured in the timeline. query_events_->MarkEvent("Unregister query"); + UnRegisterRemainingRPCs(); return Status::OK(); } @@ -2178,7 +2180,7 @@ void ClientRequestState::RegisterRPC() { // The existence of rpc_context means that this is called from an RPC if (rpc_context) { lock_guard<mutex> l(lock_); - if(pending_rpcs_.find(rpc_context) == pending_rpcs_.end()) { + if (track_rpcs_ && pending_rpcs_.find(rpc_context) == pending_rpcs_.end()) { rpc_context->Register(); pending_rpcs_.insert(rpc_context); rpc_count_->Add(1); @@ -2206,6 +2208,7 @@ void ClientRequestState::UnRegisterRemainingRPCs() { for (auto rpc_context: pending_rpcs_) { rpc_context->UnRegister(); } + track_rpcs_ = false; pending_rpcs_.clear(); } diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h index 2f112dc08..4477d648c 100644 --- a/be/src/service/client-request-state.h +++ b/be/src/service/client-request-state.h @@ -629,6 +629,9 @@ class ClientRequestState { RuntimeProfile::Counter* rpc_count_; // Contexts for RPC calls that have not completed and had their stats collected std::set<RpcEventHandler::InvocationContext*> pending_rpcs_; + // Enables tracking of RPCs in pending_rpcs_. Also used to turn off tracking once + // Finalize() has been called. + bool track_rpcs_ = true; RuntimeProfile::EventSequence* query_events_;