This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 454e93a43cf branch-3.1: [Fix](warmup) Fix
`CloudTablet::complete_rowset_segment_warmup` coredump (#55932)
454e93a43cf is described below
commit 454e93a43cf872eb58b841d69565cf7842f1d926
Author: bobhan1 <[email protected]>
AuthorDate: Sun Sep 14 00:13:40 2025 +0800
branch-3.1: [Fix](warmup) Fix `CloudTablet::complete_rowset_segment_warmup`
coredump (#55932)
### What problem does this PR solve?
fix coredump introduced in #54284
```
*** Query id: 0-0 ***
*** is nereids: 0 ***
*** tablet id: 0 ***
*** Aborted at 1757323304 (unix time) try "date -d @1757323304" if you are
using GNU date ***
*** Current BE git commitID: e2bdf54ae16 ***
*** SIGFPE integer divide by zero (@0x562de1b7030d) received by PID 5468
(TID 5727 OR 0x7f480ebd7640) from PID 18446744073201451789; stack trace: ***
*** stack smashing detected ***: terminated
*** stack smashing detected ***: terminated
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:420
1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
2# JVM_handle_linux_signal in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
3# 0x00007F493CE5F520 in /lib/x86_64-linux-gnu/libc.so.6
4# std::_Hashtable<doris::RowsetId, std::pair<doris::RowsetId const,
doris::CloudTablet::RowsetWarmUpInfo>, std::allocator<std::pair<doris::RowsetId
const, doris::CloudTablet::RowsetWarmUpInfo> >, std::__detail::_Select1st,
std::equal_to<doris::RowsetId>, std::hash<doris::RowsetId>,
std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash,
std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true,
false, true> >::_M_locate(doris::RowsetId const&) const [...]
5# doris::CloudTablet::complete_rowset_segment_warmup(doris::RowsetId,
doris::Status) at
/home/zcp/repo_center/doris_master/doris/be/src/cloud/cloud_tablet.cpp:1630
6# std::_Function_handler<void (doris::Status),
doris::CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController*,
doris::PWarmUpRowsetRequest const*, doris::PWarmUpRowsetResponse*,
google::protobuf::Closure*)::$_0>::_M_invoke(std::_Any_data const&,
doris::Status&&) at
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292
7#
doris::io::FileCacheBlockDownloader::download_segment_file(doris::io::DownloadFileMeta
const&) at
/home/zcp/repo_center/doris_master/doris/be/src/io/cache/block_file_cache_downloader.cpp:297
8# doris::ThreadPool::dispatch_thread() at
/home/zcp/repo_center/doris_master/doris/be/src/util/threadpool.cpp:621
9# doris::Thread::supervise_thread(void*) at
/home/zcp/repo_center/doris_master/doris/be/src/util/thread.cpp:461
10# start_thread at ./nptl/pthread_create.c:442
11# 0x00007F493CF43850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
```
---
be/src/cloud/cloud_internal_service.cpp | 36 ++++++++++++++++-----------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/be/src/cloud/cloud_internal_service.cpp
b/be/src/cloud/cloud_internal_service.cpp
index 08690bbbc8b..22fa9e1d4bd 100644
--- a/be/src/cloud/cloud_internal_service.cpp
+++ b/be/src/cloud/cloud_internal_service.cpp
@@ -186,6 +186,7 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
continue;
}
int64_t tablet_id = rs_meta.tablet_id();
+ auto rowset_id = rs_meta.rowset_id();
bool local_only = !(request->has_skip_existence_check() &&
request->skip_existence_check());
auto res = _engine.tablet_mgr().get_tablet(tablet_id, /* warmup_data =
*/ false,
/* sync_delete_bitmap = */
true,
@@ -212,7 +213,7 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
LOG(INFO) << "warm up rowset (request to handle) took " <<
handle_ts - request_ts
<< " us, tablet_id: " << rs_meta.tablet_id()
- << ", rowset_id: " << rs_meta.rowset_id().to_string();
+ << ", rowset_id: " << rowset_id.to_string();
}
int64_t expiration_time =
tablet_meta->ttl_seconds() == 0 ||
rs_meta.newest_write_timestamp() <= 0
@@ -223,10 +224,8 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
}
for (int64_t segment_id = 0; segment_id < rs_meta.num_segments();
segment_id++) {
- auto download_done = [&, tablet_id = rs_meta.tablet_id(),
- rowset_id = rs_meta.rowset_id().to_string(),
- segment_size =
rs_meta.segment_file_size(segment_id),
- wait](Status st) {
+ auto segment_size = rs_meta.segment_file_size(segment_id);
+ auto download_done = [=](Status st) {
if (st.ok()) {
g_file_cache_event_driven_warm_up_finished_segment_num <<
1;
g_file_cache_event_driven_warm_up_finished_segment_size <<
segment_size;
@@ -240,20 +239,22 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
now_ts - request_ts >
config::warm_up_rowset_slow_log_ms * 1000) {
g_file_cache_warm_up_rowset_slow_count << 1;
LOG(INFO) << "warm up rowset took " << now_ts -
request_ts
- << " us, tablet_id: " << tablet_id << ",
rowset_id: " << rowset_id
+ << " us, tablet_id: " << tablet_id
+ << ", rowset_id: " << rowset_id.to_string()
<< ", segment_id: " << segment_id;
}
if (now_ts - handle_ts >
config::warm_up_rowset_slow_log_ms * 1000) {
g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
LOG(INFO) << "warm up rowset (handle to finish) took "
<< now_ts - handle_ts
- << " us, tablet_id: " << tablet_id << ",
rowset_id: " << rowset_id
+ << " us, tablet_id: " << tablet_id
+ << ", rowset_id: " << rowset_id.to_string()
<< ", segment_id: " << segment_id;
}
} else {
g_file_cache_event_driven_warm_up_failed_segment_num << 1;
g_file_cache_event_driven_warm_up_failed_segment_size <<
segment_size;
LOG(WARNING) << "download segment failed, tablet_id: " <<
tablet_id
- << " rowset_id: " << rowset_id << ", error: "
<< st;
+ << " rowset_id: " << rowset_id.to_string() <<
", error: " << st;
}
if (wait) {
wait->signal();
@@ -262,9 +263,9 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
io::DownloadFileMeta download_meta {
.path =
storage_resource.value()->remote_segment_path(rs_meta, segment_id),
- .file_size = rs_meta.segment_file_size(segment_id),
+ .file_size = segment_size,
.offset = 0,
- .download_size = rs_meta.segment_file_size(segment_id),
+ .download_size = segment_size,
.file_system = storage_resource.value()->fs,
.ctx =
{
@@ -276,8 +277,7 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
.download_done = std::move(download_done),
};
g_file_cache_event_driven_warm_up_submitted_segment_num << 1;
- g_file_cache_event_driven_warm_up_submitted_segment_size
- << rs_meta.segment_file_size(segment_id);
+ g_file_cache_event_driven_warm_up_submitted_segment_size <<
segment_size;
if (wait) {
wait->add_count();
}
@@ -285,8 +285,7 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
auto download_inverted_index = [&](std::string index_path,
uint64_t idx_size) {
auto storage_resource = rs_meta.remote_storage_resource();
- auto download_done = [=, tablet_id = rs_meta.tablet_id(),
- rowset_id =
rs_meta.rowset_id().to_string()](Status st) {
+ auto download_done = [=](Status st) {
if (st.ok()) {
g_file_cache_event_driven_warm_up_finished_index_num
<< 1;
g_file_cache_event_driven_warm_up_finished_index_size
<< idx_size;
@@ -303,21 +302,22 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
g_file_cache_warm_up_rowset_slow_count << 1;
LOG(INFO) << "warm up rowset took " << now_ts -
request_ts
<< " us, tablet_id: " << tablet_id
- << ", rowset_id: " << rowset_id
+ << ", rowset_id: " <<
rowset_id.to_string()
<< ", segment_id: " << segment_id;
}
if (now_ts - handle_ts >
config::warm_up_rowset_slow_log_ms * 1000) {
g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
LOG(INFO) << "warm up rowset (handle to finish)
took "
<< now_ts - handle_ts << " us,
tablet_id: " << tablet_id
- << ", rowset_id: " << rowset_id
+ << ", rowset_id: " <<
rowset_id.to_string()
<< ", segment_id: " << segment_id;
}
} else {
g_file_cache_event_driven_warm_up_failed_index_num <<
1;
g_file_cache_event_driven_warm_up_failed_index_size <<
idx_size;
- LOG(WARNING) << "download inverted index failed,
tablet_id: " << tablet_id
- << " rowset_id: " << rowset_id << ",
error: " << st;
+ LOG(WARNING)
+ << "download inverted index failed, tablet_id:
" << tablet_id
+ << " rowset_id: " << rowset_id.to_string() <<
", error: " << st;
}
if (wait) {
wait->signal();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]