This is an automated email from the ASF dual-hosted git repository. boroknagyz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit fd9de7a2e16e72f244b9bfa15a71efe16a5c01ac Author: Riza Suminto <riza.sumi...@cloudera.com> AuthorDate: Sat Aug 23 17:10:32 2025 -0700 IMPALA-14348: Fix TSAN issue after IMPALA-14327 IMPALA-14327 reveal TSAN issue in impala-hs2-server.cc. ClientRequestState::returns_result_set() was accessed without holding ClientRequestState::lock_ in impala-hs2-server.cc. This patch fix the issue by obtaining ClientRequestState::lock_ first before accessing ClientRequestState::returns_result_set() and ClientRequestState::result_metadata(). Both accesses are done inside ImpalaServer::SetupResultsCacheing so lock_ only need to be obtained once. Filed IMPALA-14359 to follow up investigation. Testing: Run and pass TSAN build. Change-Id: I41fc25cea5b4ef7b4b9daac54b8665fa76ceb1cd Reviewed-on: http://gerrit.cloudera.org:8080/23343 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/runtime/query-driver.cc | 5 +++-- be/src/service/client-request-state.h | 2 +- be/src/service/impala-hs2-server.cc | 33 ++++++++++++++++++++++++--------- be/src/service/impala-server.h | 4 ++-- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/be/src/runtime/query-driver.cc b/be/src/runtime/query-driver.cc index 6fb584cc5..05ec95571 100644 --- a/be/src/runtime/query-driver.cc +++ b/be/src/runtime/query-driver.cc @@ -366,8 +366,9 @@ void QueryDriver::RetryQueryFromThread( if (request_state->IsResultCacheingEnabled()) { status = DebugAction(FLAGS_debug_actions, "QUERY_RETRY_SET_RESULT_CACHE"); if (status.ok()) { - status = parent_server_->SetupResultsCacheing( - retry_query_handle, session, request_state->result_cache_max_size()); + bool returns_result_set; + status = parent_server_->SetupResultsCacheing(retry_query_handle, session, + request_state->result_cache_max_size(), &returns_result_set); } if (!status.ok()) { string error_msg = Substitute( diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h index eb65da95d..7ffdb374f 100644 --- a/be/src/service/client-request-state.h +++ b/be/src/service/client-request-state.h @@ -285,7 +285,7 @@ class ClientRequestState { int num_rows_fetched() const { return num_rows_fetched_; } void set_fetched_rows() { fetched_rows_ = true; } bool fetched_rows() const { return fetched_rows_; } - bool returns_result_set() { return !result_metadata_.columns.empty(); } + bool returns_result_set() const { return !result_metadata_.columns.empty(); } const TResultSetMetadata* result_metadata() const { return &result_metadata_; } const TUniqueId& query_id() const { return query_ctx_.query_id; } diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc index 5310c6f3c..aace45fc5 100644 --- a/be/src/service/impala-hs2-server.cc +++ b/be/src/service/impala-hs2-server.cc @@ -631,8 +631,11 @@ void ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val, status = query_handle->WaitAsync(); if (!status.ok()) goto return_error; - // Optionally enable result caching on the ClientRequestState. - status = SetupResultsCacheing(query_handle, session, cache_num_rows); + // Check if query return result set and optionally enable result caching on the + // ClientRequestState. + bool returns_result_set; + status = + SetupResultsCacheing(query_handle, session, cache_num_rows, &returns_result_set); if (!status.ok()) goto return_error; // Once the query is running do a final check for session closure and add it to the @@ -641,7 +644,7 @@ void ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val, if (!status.ok()) goto return_error; return_val.__isset.operationHandle = true; return_val.operationHandle.__set_operationType(TOperationType::EXECUTE_STATEMENT); - return_val.operationHandle.__set_hasResultSet(query_handle->returns_result_set()); + return_val.operationHandle.__set_hasResultSet(returns_result_set); // Secret is inherited from session. TUniqueIdToTHandleIdentifier(query_handle->query_id(), secret, &return_val.operationHandle.operationId); @@ -657,13 +660,25 @@ void ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val, } Status ImpalaServer::SetupResultsCacheing(const QueryHandle& query_handle, - const shared_ptr<SessionState>& session, int64_t cache_num_rows) { - // Optionally enable result caching on the ClientRequestState. - if (cache_num_rows > 0) { + const shared_ptr<SessionState>& session, int64_t cache_num_rows, + bool* returns_result_set) { + QueryResultSet* result_set = nullptr; + { + // IMPALA-14359: Hold handle lock to observe members of query_handle. + // Need to investigate whether race against ClientRequestState::ExecDdlRequestImpl + // should be handled differently. + lock_guard<mutex> l(*query_handle->lock()); + *returns_result_set = query_handle->returns_result_set(); + + // Optionally enable result caching on the ClientRequestState. + if (cache_num_rows <= 0) return Status::OK(); const TResultSetMetadata* result_set_md = query_handle->result_metadata(); - QueryResultSet* result_set = - QueryResultSet::CreateHS2ResultSet(session->hs2_version, *result_set_md, nullptr, - query_handle->query_options().stringify_map_keys, 0); + result_set = QueryResultSet::CreateHS2ResultSet(session->hs2_version, *result_set_md, + nullptr, query_handle->query_options().stringify_map_keys, 0); + } + + // SetResultCache obtain CRS lock, so need to release it first. + if (result_set != nullptr) { RETURN_IF_ERROR(query_handle->SetResultCache(result_set, cache_num_rows)); } return Status::OK(); diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h index df1802e05..120f6cb65 100644 --- a/be/src/service/impala-server.h +++ b/be/src/service/impala-server.h @@ -1093,8 +1093,8 @@ class ImpalaServer : public ImpalaServiceIf, /// dynamically scroll through the results, but also allow users to download a file /// containing all the results for a query. Status SetupResultsCacheing(const QueryHandle& query_handle, - const std::shared_ptr<SessionState>& session, - int64_t cache_num_rows) WARN_UNUSED_RESULT; + const std::shared_ptr<SessionState>& session, int64_t cache_num_rows, + bool* returns_result_set) WARN_UNUSED_RESULT; /// Helper functions to translate between HiveServer2 and Impala structs