Repository: incubator-impala Updated Branches: refs/heads/master 9755ea63b -> 40bce4176
IMPALA-4411: Kudu inserts violate lock ordering and could deadlock This fixes a potential hang because the code took QueryExecState::lock_ before SessionState::lock, which is the wrong order. A hang has not yet been observed. The code that was taking the session lock doesn't actually need to be holding the QueryExecState lock_, so this is easy to fix by moving the relevant code. Testing: Running an exhaustive test job, and stress tests manually. Change-Id: I2aa36fce78525a80a6b880e1b668105006bc1425 Reviewed-on: http://gerrit.cloudera.org:8080/4895 Reviewed-by: Matthew Jacobs <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/7862e809 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/7862e809 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/7862e809 Branch: refs/heads/master Commit: 7862e809f608bd34cd0a318d0a91c70a6d95923d Parents: 9755ea6 Author: Matthew Jacobs <[email protected]> Authored: Mon Oct 31 16:48:43 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Thu Nov 3 21:28:35 2016 +0000 ---------------------------------------------------------------------- be/src/service/query-exec-state.cc | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7862e809/be/src/service/query-exec-state.cc ---------------------------------------------------------------------- diff --git a/be/src/service/query-exec-state.cc b/be/src/service/query-exec-state.cc index f7a1254..bfb5a86 100644 --- a/be/src/service/query-exec-state.cc +++ b/be/src/service/query-exec-state.cc @@ -524,22 +524,29 @@ void ImpalaServer::QueryExecState::Done() { // Make sure we join on wait_thread_ before we finish (and especially before this object // is destroyed). BlockOnWait(); - unique_lock<mutex> l(lock_); - end_time_ = TimestampValue::LocalTime(); - summary_profile_.AddInfoString("End Time", end_time().DebugString()); - summary_profile_.AddInfoString("Query State", PrintQueryState(query_state_)); - query_events_->MarkEvent("Unregister query"); + // Update latest observed Kudu timestamp stored in the session from the coordinator. + // Needs to take the session_ lock which must not be taken while holding lock_, so this + // must happen before taking lock_ below. if (coord_.get() != NULL) { - // Update latest observed Kudu timestamp stored in the session. + // This is safe to access on coord_ after Wait() has been called. uint64_t latest_kudu_ts = coord_->GetLatestKuduInsertTimestamp(); if (latest_kudu_ts > 0) { - VLOG_RPC << "Updating session latest observed Kudu timestamp: " << latest_kudu_ts; + VLOG_RPC << "Updating session (id=" << session_id() << ") with latest " + << "observed Kudu timestamp: " << latest_kudu_ts; lock_guard<mutex> session_lock(session_->lock); session_->kudu_latest_observed_ts = std::max<uint64_t>( session_->kudu_latest_observed_ts, latest_kudu_ts); } + } + unique_lock<mutex> l(lock_); + end_time_ = TimestampValue::LocalTime(); + summary_profile_.AddInfoString("End Time", end_time().DebugString()); + summary_profile_.AddInfoString("Query State", PrintQueryState(query_state_)); + query_events_->MarkEvent("Unregister query"); + + if (coord_.get() != NULL) { // Release any reserved resources. Status status = exec_env_->scheduler()->Release(schedule_.get()); if (!status.ok()) {
