IMPALA-4409: respect lock order in QueryExecState::CancelInternal() The code previously violated the (partially documented) lock order in ImpalaServer. An example of a possible cycle in the dependency graph is:
* SetQueryInFlight() holds SessionState::lock_ and waits for 'query_expiration_lock_' * ExpireQueries() holds 'query_expiration_lock_' and waits for 'query_exec_state_map_lock_' * GetQueryExecState() holds 'query_exec_state_map_lock_' and waits for QueryExecState::lock_ * QES::Cancel() holds QueryExecState::lock_ and waits for SessionState::lock It's not clear how likely the above scenario is, but it's hard to rule it out. We have not seen this hang in the wild but have seen similar ones. Testing: Ran local stress test on 3-node minicluster with TPC-H 20 and 50% of queries being cancelled. Change-Id: I785fea0163a90d0633fb6ed77ec7c6882ab5c110 Reviewed-on: http://gerrit.cloudera.org:8080/4896 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9755ea63 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9755ea63 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9755ea63 Branch: refs/heads/master Commit: 9755ea63b50d23f323c84edb4307ce603ded4fe1 Parents: 1d8cdb0 Author: Tim Armstrong <[email protected]> Authored: Mon Oct 31 16:40:53 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Thu Nov 3 20:10:02 2016 +0000 ---------------------------------------------------------------------- be/src/runtime/coordinator.h | 2 ++ be/src/service/impala-server.h | 54 +++++++++++++++++++++++++-------- be/src/service/query-exec-state.cc | 19 +++++++----- be/src/service/query-exec-state.h | 23 +++++++------- 4 files changed, 66 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9755ea63/be/src/runtime/coordinator.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h index 53f00eb..fab6b29 100644 --- a/be/src/runtime/coordinator.h +++ b/be/src/runtime/coordinator.h @@ -199,6 +199,7 @@ class Coordinator { return exec_summary_; } + /// See the ImpalaServer class comment for the required lock acquisition order. SpinLock& GetExecSummaryLock() const { return exec_summary_lock_; } /// Receive a local filter update from a fragment instance. Aggregate that filter update @@ -351,6 +352,7 @@ class Coordinator { boost::scoped_ptr<ObjectPool> obj_pool_; /// Execution summary for this query. + /// See the ImpalaServer class comment for the required lock acquisition order. mutable SpinLock exec_summary_lock_; TExecSummary exec_summary_; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9755ea63/be/src/service/impala-server.h ---------------------------------------------------------------------- diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h index 7d0f7c9..c40fcb4 100644 --- a/be/src/service/impala-server.h +++ b/be/src/service/impala-server.h @@ -72,9 +72,34 @@ class TGetExecSummaryReq; /// An ImpalaServer contains both frontend and backend functionality; /// it implements ImpalaService (Beeswax), ImpalaHiveServer2Service (HiveServer2) /// and ImpalaInternalService APIs. -/// This class is partially thread-safe. To ensure freedom from deadlock, -/// locks on the maps are obtained before locks on the items contained in the maps. -// +/// +/// Locking +/// ------- +/// This class is partially thread-safe. To ensure freedom from deadlock, if multiple +/// locks are acquired, lower-numbered locks must be acquired before higher-numbered +/// locks: +/// 1. connection_to_sessions_map_lock_ +/// 2. session_state_map_lock_ +/// 3. SessionState::lock +/// 4. query_expiration_lock_ +/// 5. query_exec_state_map_lock_ +/// 6. QueryExecState::fetch_rows_lock +/// 7. QueryExecState::lock +/// 8. QueryExecState::expiration_data_lock_ +/// 9. Coordinator::exec_summary_lock +/// +/// Coordinator::lock_ should not be acquired at the same time as the +/// ImpalaServer/SessionState/QueryExecState locks. Aside from +/// Coordinator::exec_summary_lock_ the Coordinator's lock ordering is independent of +/// the above lock ordering. +/// +/// The following locks are not held in conjunction with other locks: +/// * query_log_lock_ +/// * session_timeout_lock_ +/// * query_locations_lock_ +/// * uuid_lock_ +/// * catalog_version_lock_ +/// /// TODO: The state of a running query is currently not cleaned up if the /// query doesn't experience any errors at runtime and close() doesn't get called. /// The solution is to have a separate thread that cleans up orphaned @@ -638,7 +663,10 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf, typedef boost::unordered_map<TUniqueId, std::shared_ptr<QueryExecState>> QueryExecStateMap; QueryExecStateMap query_exec_state_map_; - boost::mutex query_exec_state_map_lock_; // protects query_exec_state_map_ + + /// Protects query_exec_state_map_. See "Locking" in the class comment for lock + /// acquisition order. + boost::mutex query_exec_state_map_lock_; /// Default query options in the form of TQueryOptions and beeswax::ConfigVariable TQueryOptions default_query_options_; @@ -671,8 +699,8 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf, /// Client network address. TNetworkAddress network_address; - /// Protects all fields below. - /// If this lock has to be taken with query_exec_state_map_lock, take this lock first. + /// Protects all fields below. See "Locking" in the class comment for lock + /// acquisition order. boost::mutex lock; /// If true, the session has been closed. @@ -765,16 +793,16 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf, /// For access to GetSessionState() / MarkSessionInactive() friend class ScopedSessionState; - /// Protects session_state_map_. Should be taken before any query exec-state locks, - /// including query_exec_state_map_lock_. Should be taken before individual - /// session-state locks. + /// Protects session_state_map_. See "Locking" in the class comment for lock + /// acquisition order. boost::mutex session_state_map_lock_; /// A map from session identifier to a structure containing per-session information typedef boost::unordered_map<TUniqueId, std::shared_ptr<SessionState>> SessionStateMap; SessionStateMap session_state_map_; - /// Protects connection_to_sessions_map_. May be taken before session_state_map_lock_. + /// Protects connection_to_sessions_map_. See "Locking" in the class comment for lock + /// acquisition order. boost::mutex connection_to_sessions_map_lock_; /// Map from a connection ID to the associated list of sessions so that all can be @@ -801,8 +829,7 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf, session->last_accessed_ms = UnixMillis(); } - /// protects query_locations_. Must always be taken after - /// query_exec_state_map_lock_ if both are required. + /// Protects query_locations_. Not held in conjunction with other locks. boost::mutex query_locations_lock_; /// A map from backend to the list of queries currently running there. @@ -865,7 +892,8 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf, ProxyUserMap; ProxyUserMap authorized_proxy_user_config_; - /// Guards queries_by_timestamp_. Must not be acquired before a session state lock. + /// Guards queries_by_timestamp_. See "Locking" in the class comment for lock + /// acquisition order. boost::mutex query_expiration_lock_; /// Describes a query expiration event (t, q) where t is the expiration deadline in http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9755ea63/be/src/service/query-exec-state.cc ---------------------------------------------------------------------- diff --git a/be/src/service/query-exec-state.cc b/be/src/service/query-exec-state.cc index 5cd4c0c..f7a1254 100644 --- a/be/src/service/query-exec-state.cc +++ b/be/src/service/query-exec-state.cc @@ -822,17 +822,20 @@ Status ImpalaServer::QueryExecState::FetchRowsInternal(const int32_t max_rows, } Status ImpalaServer::QueryExecState::Cancel(bool check_inflight, const Status* cause) { + if (check_inflight) { + // If the query is in 'inflight_queries' it means that the query has actually started + // executing. It is ok if the query is removed from 'inflight_queries' during + // cancellation, so we can release the session lock before starting the cancellation + // work. + lock_guard<mutex> session_lock(session_->lock); + if (session_->inflight_queries.find(query_id()) == session_->inflight_queries.end()) { + return Status("Query not yet running"); + } + } + Coordinator* coord; { lock_guard<mutex> lock(lock_); - if (check_inflight) { - lock_guard<mutex> session_lock(session_->lock); - if (session_->inflight_queries.find(query_id()) == - session_->inflight_queries.end()) { - return Status("Query not yet running"); - } - } - // If the query is completed or cancelled, no need to update state. bool already_done = eos_ || query_state_ == QueryState::EXCEPTION; if (!already_done && cause != NULL) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9755ea63/be/src/service/query-exec-state.h ---------------------------------------------------------------------- diff --git a/be/src/service/query-exec-state.h b/be/src/service/query-exec-state.h index 54ee929..78c859c 100644 --- a/be/src/service/query-exec-state.h +++ b/be/src/service/query-exec-state.h @@ -49,9 +49,9 @@ class QueryExecStateCleaner; /// we can return to the client. It also captures all state required for /// servicing query-related requests from the client. /// Thread safety: this class is generally not thread-safe, callers need to -/// synchronize access explicitly via lock(). -/// To avoid deadlocks, the caller must *not* acquire query_exec_state_map_lock_ -/// while holding the exec state's lock. +/// synchronize access explicitly via lock(). See the ImpalaServer class comment for +/// the required lock acquisition order. +/// /// TODO: Consider renaming to RequestExecState for consistency. /// TODO: Compute stats is the only stmt that requires child queries. Once the /// CatalogService performs background stats gathering the concept of child queries @@ -207,11 +207,11 @@ class ImpalaServer::QueryExecState { /// responsible for acquiring this lock. To avoid deadlocks, callers must not hold lock_ /// while acquiring this lock (since FetchRows() will release and re-acquire lock_ during /// its execution). + /// See "Locking" in the class comment for lock acquisition order. boost::mutex fetch_rows_lock_; - /// Protects last_active_time_ and ref_count_. - /// Must always be taken as the last lock, that is no other locks may be taken while - /// holding this lock. + /// Protects last_active_time_ and ref_count_. Only held during short function calls - + /// no other locks should be acquired while holding this lock. mutable boost::mutex expiration_data_lock_; int64_t last_active_time_; @@ -223,11 +223,12 @@ class ImpalaServer::QueryExecState { /// Executor for any child queries (e.g. compute stats subqueries). Always non-NULL. const boost::scoped_ptr<ChildQueryExecutor> child_query_executor_; - // Protects all following fields. Acquirers should be careful not to hold it for too - // long, e.g. during RPCs because this lock is required to make progress on various - // ImpalaServer requests. If held for too long it can block progress of client - // requests for this query, e.g. query status and cancellation. Furthermore, until - // IMPALA-3882 is fixed, it can indirectly block progress on all other queries. + /// Protects all following fields. Acquirers should be careful not to hold it for too + /// long, e.g. during RPCs because this lock is required to make progress on various + /// ImpalaServer requests. If held for too long it can block progress of client + /// requests for this query, e.g. query status and cancellation. Furthermore, until + /// IMPALA-3882 is fixed, it can indirectly block progress on all other queries. + /// See "Locking" in the class comment for lock acquisition order. boost::mutex lock_; ExecEnv* exec_env_;
