This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fd9de7a2e16e72f244b9bfa15a71efe16a5c01ac
Author: Riza Suminto <riza.sumi...@cloudera.com>
AuthorDate: Sat Aug 23 17:10:32 2025 -0700

    IMPALA-14348: Fix TSAN issue after IMPALA-14327
    
    IMPALA-14327 reveal TSAN issue in impala-hs2-server.cc.
    ClientRequestState::returns_result_set() was accessed without holding
    ClientRequestState::lock_ in impala-hs2-server.cc.
    
    This patch fix the issue by obtaining ClientRequestState::lock_ first
    before accessing ClientRequestState::returns_result_set() and
    ClientRequestState::result_metadata(). Both accesses are done inside
    ImpalaServer::SetupResultsCacheing so lock_ only need to be obtained
    once. Filed IMPALA-14359 to follow up investigation.
    
    Testing:
    Run and pass TSAN build.
    
    Change-Id: I41fc25cea5b4ef7b4b9daac54b8665fa76ceb1cd
    Reviewed-on: http://gerrit.cloudera.org:8080/23343
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 be/src/runtime/query-driver.cc        |  5 +++--
 be/src/service/client-request-state.h |  2 +-
 be/src/service/impala-hs2-server.cc   | 33 ++++++++++++++++++++++++---------
 be/src/service/impala-server.h        |  4 ++--
 4 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/be/src/runtime/query-driver.cc b/be/src/runtime/query-driver.cc
index 6fb584cc5..05ec95571 100644
--- a/be/src/runtime/query-driver.cc
+++ b/be/src/runtime/query-driver.cc
@@ -366,8 +366,9 @@ void QueryDriver::RetryQueryFromThread(
   if (request_state->IsResultCacheingEnabled()) {
     status = DebugAction(FLAGS_debug_actions, "QUERY_RETRY_SET_RESULT_CACHE");
     if (status.ok()) {
-      status = parent_server_->SetupResultsCacheing(
-          retry_query_handle, session, request_state->result_cache_max_size());
+      bool returns_result_set;
+      status = parent_server_->SetupResultsCacheing(retry_query_handle, 
session,
+          request_state->result_cache_max_size(), &returns_result_set);
     }
     if (!status.ok()) {
       string error_msg = Substitute(
diff --git a/be/src/service/client-request-state.h 
b/be/src/service/client-request-state.h
index eb65da95d..7ffdb374f 100644
--- a/be/src/service/client-request-state.h
+++ b/be/src/service/client-request-state.h
@@ -285,7 +285,7 @@ class ClientRequestState {
   int num_rows_fetched() const { return num_rows_fetched_; }
   void set_fetched_rows() { fetched_rows_ = true; }
   bool fetched_rows() const { return fetched_rows_; }
-  bool returns_result_set() { return !result_metadata_.columns.empty(); }
+  bool returns_result_set() const { return !result_metadata_.columns.empty(); }
   const TResultSetMetadata* result_metadata() const { return 
&result_metadata_; }
   const TUniqueId& query_id() const { return query_ctx_.query_id; }
 
diff --git a/be/src/service/impala-hs2-server.cc 
b/be/src/service/impala-hs2-server.cc
index 5310c6f3c..aace45fc5 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -631,8 +631,11 @@ void 
ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val,
   status = query_handle->WaitAsync();
   if (!status.ok()) goto return_error;
 
-  // Optionally enable result caching on the ClientRequestState.
-  status = SetupResultsCacheing(query_handle, session, cache_num_rows);
+  // Check if query return result set and optionally enable result caching on 
the
+  // ClientRequestState.
+  bool returns_result_set;
+  status =
+      SetupResultsCacheing(query_handle, session, cache_num_rows, 
&returns_result_set);
   if (!status.ok()) goto return_error;
 
   // Once the query is running do a final check for session closure and add it 
to the
@@ -641,7 +644,7 @@ void 
ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val,
   if (!status.ok()) goto return_error;
   return_val.__isset.operationHandle = true;
   
return_val.operationHandle.__set_operationType(TOperationType::EXECUTE_STATEMENT);
-  
return_val.operationHandle.__set_hasResultSet(query_handle->returns_result_set());
+  return_val.operationHandle.__set_hasResultSet(returns_result_set);
   // Secret is inherited from session.
   TUniqueIdToTHandleIdentifier(query_handle->query_id(), secret,
                                &return_val.operationHandle.operationId);
@@ -657,13 +660,25 @@ void 
ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val,
 }
 
 Status ImpalaServer::SetupResultsCacheing(const QueryHandle& query_handle,
-    const shared_ptr<SessionState>& session, int64_t cache_num_rows) {
-  // Optionally enable result caching on the ClientRequestState.
-  if (cache_num_rows > 0) {
+    const shared_ptr<SessionState>& session, int64_t cache_num_rows,
+    bool* returns_result_set) {
+  QueryResultSet* result_set = nullptr;
+  {
+    // IMPALA-14359: Hold handle lock to observe members of query_handle.
+    // Need to investigate whether race against 
ClientRequestState::ExecDdlRequestImpl
+    // should be handled differently.
+    lock_guard<mutex> l(*query_handle->lock());
+    *returns_result_set = query_handle->returns_result_set();
+
+    // Optionally enable result caching on the ClientRequestState.
+    if (cache_num_rows <= 0) return Status::OK();
     const TResultSetMetadata* result_set_md = query_handle->result_metadata();
-    QueryResultSet* result_set =
-        QueryResultSet::CreateHS2ResultSet(session->hs2_version, 
*result_set_md, nullptr,
-            query_handle->query_options().stringify_map_keys, 0);
+    result_set = QueryResultSet::CreateHS2ResultSet(session->hs2_version, 
*result_set_md,
+        nullptr, query_handle->query_options().stringify_map_keys, 0);
+  }
+
+  // SetResultCache obtain CRS lock, so need to release it first.
+  if (result_set != nullptr) {
     RETURN_IF_ERROR(query_handle->SetResultCache(result_set, cache_num_rows));
   }
   return Status::OK();
diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h
index df1802e05..120f6cb65 100644
--- a/be/src/service/impala-server.h
+++ b/be/src/service/impala-server.h
@@ -1093,8 +1093,8 @@ class ImpalaServer : public ImpalaServiceIf,
   /// dynamically scroll through the results, but also allow users to download 
a file
   /// containing all the results for a query.
   Status SetupResultsCacheing(const QueryHandle& query_handle,
-      const std::shared_ptr<SessionState>& session,
-      int64_t cache_num_rows) WARN_UNUSED_RESULT;
+      const std::shared_ptr<SessionState>& session, int64_t cache_num_rows,
+      bool* returns_result_set) WARN_UNUSED_RESULT;
 
   /// Helper functions to translate between HiveServer2 and Impala structs
 

Reply via email to