This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 454e93a43cf branch-3.1: [Fix](warmup) Fix 
`CloudTablet::complete_rowset_segment_warmup` coredump (#55932)
454e93a43cf is described below

commit 454e93a43cf872eb58b841d69565cf7842f1d926
Author: bobhan1 <[email protected]>
AuthorDate: Sun Sep 14 00:13:40 2025 +0800

    branch-3.1: [Fix](warmup) Fix `CloudTablet::complete_rowset_segment_warmup` 
coredump (#55932)
    
    ### What problem does this PR solve?
    
    fix coredump introduced in #54284
    ```
    *** Query id: 0-0 ***
    *** is nereids: 0 ***
    *** tablet id: 0 ***
    *** Aborted at 1757323304 (unix time) try "date -d @1757323304" if you are 
using GNU date ***
    *** Current BE git commitID: e2bdf54ae16 ***
    *** SIGFPE integer divide by zero (@0x562de1b7030d) received by PID 5468 
(TID 5727 OR 0x7f480ebd7640) from PID 18446744073201451789; stack trace: ***
    *** stack smashing detected ***: terminated
    *** stack smashing detected ***: terminated
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:420
     1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     2# JVM_handle_linux_signal in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     3# 0x00007F493CE5F520 in /lib/x86_64-linux-gnu/libc.so.6
     4# std::_Hashtable<doris::RowsetId, std::pair<doris::RowsetId const, 
doris::CloudTablet::RowsetWarmUpInfo>, std::allocator<std::pair<doris::RowsetId 
const, doris::CloudTablet::RowsetWarmUpInfo> >, std::__detail::_Select1st, 
std::equal_to<doris::RowsetId>, std::hash<doris::RowsetId>, 
std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, 
std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, 
false, true> >::_M_locate(doris::RowsetId const&) const  [...]
     5# doris::CloudTablet::complete_rowset_segment_warmup(doris::RowsetId, 
doris::Status) at 
/home/zcp/repo_center/doris_master/doris/be/src/cloud/cloud_tablet.cpp:1630
     6# std::_Function_handler<void (doris::Status), 
doris::CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController*,
 doris::PWarmUpRowsetRequest const*, doris::PWarmUpRowsetResponse*, 
google::protobuf::Closure*)::$_0>::_M_invoke(std::_Any_data const&, 
doris::Status&&) at 
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292
     7# 
doris::io::FileCacheBlockDownloader::download_segment_file(doris::io::DownloadFileMeta
 const&) at 
/home/zcp/repo_center/doris_master/doris/be/src/io/cache/block_file_cache_downloader.cpp:297
     8# doris::ThreadPool::dispatch_thread() at 
/home/zcp/repo_center/doris_master/doris/be/src/util/threadpool.cpp:621
     9# doris::Thread::supervise_thread(void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/util/thread.cpp:461
    10# start_thread at ./nptl/pthread_create.c:442
    11# 0x00007F493CF43850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
    ```
---
 be/src/cloud/cloud_internal_service.cpp | 36 ++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/be/src/cloud/cloud_internal_service.cpp 
b/be/src/cloud/cloud_internal_service.cpp
index 08690bbbc8b..22fa9e1d4bd 100644
--- a/be/src/cloud/cloud_internal_service.cpp
+++ b/be/src/cloud/cloud_internal_service.cpp
@@ -186,6 +186,7 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
             continue;
         }
         int64_t tablet_id = rs_meta.tablet_id();
+        auto rowset_id = rs_meta.rowset_id();
         bool local_only = !(request->has_skip_existence_check() && 
request->skip_existence_check());
         auto res = _engine.tablet_mgr().get_tablet(tablet_id, /* warmup_data = 
*/ false,
                                                    /* sync_delete_bitmap = */ 
true,
@@ -212,7 +213,7 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
             g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
             LOG(INFO) << "warm up rowset (request to handle) took " << 
handle_ts - request_ts
                       << " us, tablet_id: " << rs_meta.tablet_id()
-                      << ", rowset_id: " << rs_meta.rowset_id().to_string();
+                      << ", rowset_id: " << rowset_id.to_string();
         }
         int64_t expiration_time =
                 tablet_meta->ttl_seconds() == 0 || 
rs_meta.newest_write_timestamp() <= 0
@@ -223,10 +224,8 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
         }
 
         for (int64_t segment_id = 0; segment_id < rs_meta.num_segments(); 
segment_id++) {
-            auto download_done = [&, tablet_id = rs_meta.tablet_id(),
-                                  rowset_id = rs_meta.rowset_id().to_string(),
-                                  segment_size = 
rs_meta.segment_file_size(segment_id),
-                                  wait](Status st) {
+            auto segment_size = rs_meta.segment_file_size(segment_id);
+            auto download_done = [=](Status st) {
                 if (st.ok()) {
                     g_file_cache_event_driven_warm_up_finished_segment_num << 
1;
                     g_file_cache_event_driven_warm_up_finished_segment_size << 
segment_size;
@@ -240,20 +239,22 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
                         now_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
                         g_file_cache_warm_up_rowset_slow_count << 1;
                         LOG(INFO) << "warm up rowset took " << now_ts - 
request_ts
-                                  << " us, tablet_id: " << tablet_id << ", 
rowset_id: " << rowset_id
+                                  << " us, tablet_id: " << tablet_id
+                                  << ", rowset_id: " << rowset_id.to_string()
                                   << ", segment_id: " << segment_id;
                     }
                     if (now_ts - handle_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
                         
g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
                         LOG(INFO) << "warm up rowset (handle to finish) took " 
<< now_ts - handle_ts
-                                  << " us, tablet_id: " << tablet_id << ", 
rowset_id: " << rowset_id
+                                  << " us, tablet_id: " << tablet_id
+                                  << ", rowset_id: " << rowset_id.to_string()
                                   << ", segment_id: " << segment_id;
                     }
                 } else {
                     g_file_cache_event_driven_warm_up_failed_segment_num << 1;
                     g_file_cache_event_driven_warm_up_failed_segment_size << 
segment_size;
                     LOG(WARNING) << "download segment failed, tablet_id: " << 
tablet_id
-                                 << " rowset_id: " << rowset_id << ", error: " 
<< st;
+                                 << " rowset_id: " << rowset_id.to_string() << 
", error: " << st;
                 }
                 if (wait) {
                     wait->signal();
@@ -262,9 +263,9 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
 
             io::DownloadFileMeta download_meta {
                     .path = 
storage_resource.value()->remote_segment_path(rs_meta, segment_id),
-                    .file_size = rs_meta.segment_file_size(segment_id),
+                    .file_size = segment_size,
                     .offset = 0,
-                    .download_size = rs_meta.segment_file_size(segment_id),
+                    .download_size = segment_size,
                     .file_system = storage_resource.value()->fs,
                     .ctx =
                             {
@@ -276,8 +277,7 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
                     .download_done = std::move(download_done),
             };
             g_file_cache_event_driven_warm_up_submitted_segment_num << 1;
-            g_file_cache_event_driven_warm_up_submitted_segment_size
-                    << rs_meta.segment_file_size(segment_id);
+            g_file_cache_event_driven_warm_up_submitted_segment_size << 
segment_size;
             if (wait) {
                 wait->add_count();
             }
@@ -285,8 +285,7 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
 
             auto download_inverted_index = [&](std::string index_path, 
uint64_t idx_size) {
                 auto storage_resource = rs_meta.remote_storage_resource();
-                auto download_done = [=, tablet_id = rs_meta.tablet_id(),
-                                      rowset_id = 
rs_meta.rowset_id().to_string()](Status st) {
+                auto download_done = [=](Status st) {
                     if (st.ok()) {
                         g_file_cache_event_driven_warm_up_finished_index_num 
<< 1;
                         g_file_cache_event_driven_warm_up_finished_index_size 
<< idx_size;
@@ -303,21 +302,22 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
                             g_file_cache_warm_up_rowset_slow_count << 1;
                             LOG(INFO) << "warm up rowset took " << now_ts - 
request_ts
                                       << " us, tablet_id: " << tablet_id
-                                      << ", rowset_id: " << rowset_id
+                                      << ", rowset_id: " << 
rowset_id.to_string()
                                       << ", segment_id: " << segment_id;
                         }
                         if (now_ts - handle_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
                             
g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
                             LOG(INFO) << "warm up rowset (handle to finish) 
took "
                                       << now_ts - handle_ts << " us, 
tablet_id: " << tablet_id
-                                      << ", rowset_id: " << rowset_id
+                                      << ", rowset_id: " << 
rowset_id.to_string()
                                       << ", segment_id: " << segment_id;
                         }
                     } else {
                         g_file_cache_event_driven_warm_up_failed_index_num << 
1;
                         g_file_cache_event_driven_warm_up_failed_index_size << 
idx_size;
-                        LOG(WARNING) << "download inverted index failed, 
tablet_id: " << tablet_id
-                                     << " rowset_id: " << rowset_id << ", 
error: " << st;
+                        LOG(WARNING)
+                                << "download inverted index failed, tablet_id: 
" << tablet_id
+                                << " rowset_id: " << rowset_id.to_string() << 
", error: " << st;
                     }
                     if (wait) {
                         wait->signal();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to