This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new a8ae171bdf2 [enhancement](filecache) add more debug info via
VLOG_DEBUG (#58043) (#61194)
a8ae171bdf2 is described below
commit a8ae171bdf2689ecf0120dfaf6d3a761d347fae7
Author: zhengyu <[email protected]>
AuthorDate: Tue May 26 21:53:45 2026 +0800
[enhancement](filecache) add more debug info via VLOG_DEBUG (#58043)
(#61194)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Signed-off-by: zhengyu <[email protected]>
---
be/src/cloud/cloud_backend_service.cpp | 8 +++++++-
be/src/cloud/cloud_tablet.cpp | 3 +++
be/src/cloud/cloud_tablet_mgr.cpp | 3 +++
be/src/cloud/cloud_warm_up_manager.cpp | 2 ++
be/src/io/cache/block_file_cache.cpp | 37 +++++++++++++++++++++++++++-------
be/src/io/cache/block_file_cache.h | 3 ++-
be/src/io/fs/s3_file_reader.cpp | 4 ++++
7 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/be/src/cloud/cloud_backend_service.cpp
b/be/src/cloud/cloud_backend_service.cpp
index 4865a4ae06d..92c7a2b3871 100644
--- a/be/src/cloud/cloud_backend_service.cpp
+++ b/be/src/cloud/cloud_backend_service.cpp
@@ -32,6 +32,7 @@
#include "runtime/stream_load/stream_load_context.h"
#include "runtime/stream_load/stream_load_recorder.h"
#include "util/brpc_client_cache.h" // BrpcClientCache
+#include "util/stack_util.h"
#include "util/thrift_server.h"
namespace doris {
@@ -213,8 +214,13 @@ void
CloudBackendService::warm_up_cache_async(TWarmUpCacheAsyncResponse& respons
return;
}
PGetFileCacheMetaRequest brpc_request;
+ std::stringstream ss;
std::for_each(request.tablet_ids.cbegin(), request.tablet_ids.cend(),
- [&](int64_t tablet_id) {
brpc_request.add_tablet_ids(tablet_id); });
+ [&](int64_t tablet_id) {
+ brpc_request.add_tablet_ids(tablet_id);
+ ss << tablet_id << ",";
+ });
+ VLOG_DEBUG << "tablets set: " << ss.str() << " stack: " <<
get_stack_trace();
auto run_rpc = [this, brpc_stub,
brpc_addr](PGetFileCacheMetaRequest request_copy) ->
Status {
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index db620468fcb..f16e155c446 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -63,6 +63,7 @@
#include "olap/tablet_schema.h"
#include "olap/txn_manager.h"
#include "util/debug_points.h"
+#include "util/stack_util.h"
#include "vec/common/schema_util.h"
namespace doris {
@@ -421,6 +422,8 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr>
to_add, bool version_
return;
}
+ VLOG_DEBUG << "add_rowsets tablet_id=" << tablet_id() << " stack: " <<
get_stack_trace();
+
auto add_rowsets_directly = [=, this](std::vector<RowsetSharedPtr>&
rowsets) {
for (auto& rs : rowsets) {
if (warmup_delta_data) {
diff --git a/be/src/cloud/cloud_tablet_mgr.cpp
b/be/src/cloud/cloud_tablet_mgr.cpp
index 7c1a1518fe6..bb186adc51d 100644
--- a/be/src/cloud/cloud_tablet_mgr.cpp
+++ b/be/src/cloud/cloud_tablet_mgr.cpp
@@ -29,6 +29,7 @@
#include "common/status.h"
#include "olap/lru_cache.h"
#include "runtime/memory/cache_policy.h"
+#include "util/stack_util.h"
namespace doris {
uint64_t g_tablet_report_inactive_duration_ms = 0;
@@ -176,6 +177,8 @@ Result<std::shared_ptr<CloudTablet>>
CloudTabletMgr::get_tablet(int64_t tablet_i
TabletMap& tablet_map;
};
+ VLOG_DEBUG << "get_tablet tablet_id=" << tablet_id << " stack: " <<
get_stack_trace();
+
auto tablet_id_str = std::to_string(tablet_id);
CacheKey key(tablet_id_str);
auto* handle = _cache->lookup(key);
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp
b/be/src/cloud/cloud_warm_up_manager.cpp
index b63af0592d2..62fd6735365 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -42,6 +42,7 @@
#include "runtime/client_cache.h"
#include "runtime/exec_env.h"
#include "util/brpc_client_cache.h" // BrpcClientCache
+#include "util/stack_util.h"
#include "util/thrift_rpc_helper.h"
#include "util/time.h"
@@ -216,6 +217,7 @@ void CloudWarmUpManager::handle_jobs() {
std::make_shared<bthread::CountdownEvent>(0);
for (int64_t tablet_id : cur_job->tablet_ids) {
+ VLOG_DEBUG << "Warm up tablet " << tablet_id << " stack: " <<
get_stack_trace();
if (_cur_job_id == 0) { // The job is canceled
break;
}
diff --git a/be/src/io/cache/block_file_cache.cpp
b/be/src/io/cache/block_file_cache.cpp
index 50ad7b98370..620603ffed7 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -49,6 +49,7 @@
#include "io/cache/mem_file_cache_storage.h"
#include "util/concurrency_stats.h"
#include "util/runtime_profile.h"
+#include "util/stack_util.h"
#include "util/stopwatch.hpp"
#include "util/thread.h"
#include "util/time.h"
@@ -1016,6 +1017,9 @@ size_t BlockFileCache::try_release() {
std::lock_guard lc(cell->file_block->_mutex);
remove_size += file_block->range().size();
remove(file_block, cache_lock, lc);
+ VLOG_DEBUG << "try_release " << _cache_base_path
+ << " hash=" << file_block->get_hash_value().to_string()
+ << " offset=" << file_block->offset();
}
*_evict_by_try_release << remove_size;
LOG(INFO) << "Released " << trash.size() << " blocks in file cache " <<
_cache_base_path;
@@ -1055,12 +1059,16 @@ const LRUQueue& BlockFileCache::get_queue(FileCacheType
type) const {
}
void BlockFileCache::remove_file_blocks(std::vector<FileBlockCell*>& to_evict,
- std::lock_guard<std::mutex>&
cache_lock, bool sync) {
+ std::lock_guard<std::mutex>&
cache_lock, bool sync,
+ std::string& reason) {
auto remove_file_block_if = [&](FileBlockCell* cell) {
FileBlockSPtr file_block = cell->file_block;
if (file_block) {
std::lock_guard block_lock(file_block->_mutex);
remove(file_block, cache_lock, block_lock, sync);
+ VLOG_DEBUG << "remove_file_blocks"
+ << " hash=" << file_block->get_hash_value().to_string()
+ << " offset=" << file_block->offset() << " reason=" <<
reason;
}
};
std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
@@ -1323,6 +1331,7 @@ bool BlockFileCache::remove_if_ttl_file_blocks(const
UInt128Wrapper& file_key, b
// remove specific cache synchronously, for critical operations
// if in use, cache meta will be deleted after use and the block file is then
deleted asynchronously
void BlockFileCache::remove_if_cached(const UInt128Wrapper& file_key) {
+ std::string reason = "remove_if_cached";
SCOPED_CACHE_LOCK(_mutex, this);
bool is_ttl_file = remove_if_ttl_file_blocks(file_key, true, cache_lock,
true);
if (!is_ttl_file) {
@@ -1337,7 +1346,7 @@ void BlockFileCache::remove_if_cached(const
UInt128Wrapper& file_key) {
}
}
}
- remove_file_blocks(to_remove, cache_lock, true);
+ remove_file_blocks(to_remove, cache_lock, true, reason);
}
}
@@ -1345,6 +1354,7 @@ void BlockFileCache::remove_if_cached(const
UInt128Wrapper& file_key) {
// cache meta is deleted synchronously if not in use, and the block file is
deleted asynchronously
// if in use, cache meta will be deleted after use and the block file is then
deleted asynchronously
void BlockFileCache::remove_if_cached_async(const UInt128Wrapper& file_key) {
+ std::string reason = "remove_if_cached_async";
SCOPED_CACHE_LOCK(_mutex, this);
bool is_ttl_file = remove_if_ttl_file_blocks(file_key, true, cache_lock,
/*sync*/ false);
if (!is_ttl_file) {
@@ -1361,7 +1371,7 @@ void BlockFileCache::remove_if_cached_async(const
UInt128Wrapper& file_key) {
}
}
}
- remove_file_blocks(to_remove, cache_lock, false);
+ remove_file_blocks(to_remove, cache_lock, false, reason);
}
}
@@ -1458,7 +1468,9 @@ bool
BlockFileCache::try_reserve_from_other_queue_by_time_interval(
*(_evict_by_time_metrics_matrix[cache_type][cur_type]) <<
remove_size_per_type;
}
bool is_sync_removal = !evict_in_advance;
- remove_file_blocks(to_evict, cache_lock, is_sync_removal);
+ std::string reason = std::string("try_reserve_by_time ") +
+ " evict_in_advance=" + (evict_in_advance ? "true" :
"false");
+ remove_file_blocks(to_evict, cache_lock, is_sync_removal, reason);
return !is_overflow(removed_size, size, cur_cache_size, evict_in_advance);
}
@@ -1501,7 +1513,9 @@ bool BlockFileCache::try_reserve_from_other_queue_by_size(
*(_evict_by_size_metrics_matrix[cache_type][cur_type]) <<
cur_removed_size;
}
bool is_sync_removal = !evict_in_advance;
- remove_file_blocks(to_evict, cache_lock, is_sync_removal);
+ std::string reason = std::string("try_reserve_by_size") +
+ " evict_in_advance=" + (evict_in_advance ? "true" :
"false");
+ remove_file_blocks(to_evict, cache_lock, is_sync_removal, reason);
return !is_overflow(removed_size, size, cur_cache_size, evict_in_advance);
}
@@ -1548,7 +1562,10 @@ bool BlockFileCache::try_reserve_for_lru(const
UInt128Wrapper& hash,
find_evict_candidates(queue, size, cur_cache_size, removed_size,
to_evict, cache_lock,
cur_removed_size, evict_in_advance);
bool is_sync_removal = !evict_in_advance;
- remove_file_blocks(to_evict, cache_lock, is_sync_removal);
+ std::string reason = std::string("try_reserve for cache type ") +
+ cache_type_to_string(context.cache_type) +
+ " evict_in_advance=" + (evict_in_advance ? "true"
: "false");
+ remove_file_blocks(to_evict, cache_lock, is_sync_removal, reason);
*(_evict_by_self_lru_metrics_matrix[context.cache_type]) <<
cur_removed_size;
if (is_overflow(removed_size, size, cur_cache_size, evict_in_advance))
{
@@ -1583,6 +1600,11 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T&
cache_lock, U& block_lo
*_queue_evict_size_metrics[static_cast<int>(file_block->cache_type())]
<< file_block->range().size();
*_total_evict_size_metrics << file_block->range().size();
+
+ VLOG_DEBUG << "Removing file block from cache. hash: " << hash.to_string()
+ << ", offset: " << offset << ", size: " <<
file_block->range().size()
+ << ", type: " << cache_type_to_string(type);
+
if (file_block->state_unlock(block_lock) == FileBlock::State::DOWNLOADED) {
FileCacheKey key;
key.hash = hash;
@@ -2379,7 +2401,8 @@ bool BlockFileCache::try_reserve_during_async_load(size_t
size,
if (index_queue_size != 0) {
collect_eliminate_fragments(get_queue(FileCacheType::INDEX));
}
- remove_file_blocks(to_evict, cache_lock, true);
+ std::string reason = "async load";
+ remove_file_blocks(to_evict, cache_lock, true, reason);
return !_disk_resource_limit_mode || removed_size >= size;
}
diff --git a/be/src/io/cache/block_file_cache.h
b/be/src/io/cache/block_file_cache.h
index f73e593c57e..742ec7aaae4 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -478,7 +478,8 @@ private:
bool is_overflow(size_t removed_size, size_t need_size, size_t
cur_cache_size,
bool evict_in_advance) const;
- void remove_file_blocks(std::vector<FileBlockCell*>&,
std::lock_guard<std::mutex>&, bool sync);
+ void remove_file_blocks(std::vector<FileBlockCell*>&,
std::lock_guard<std::mutex>&, bool sync,
+ std::string& reason);
void remove_file_blocks_and_clean_time_maps(std::vector<FileBlockCell*>&,
std::lock_guard<std::mutex>&);
diff --git a/be/src/io/fs/s3_file_reader.cpp b/be/src/io/fs/s3_file_reader.cpp
index 091768532b5..a20d448fa6d 100644
--- a/be/src/io/fs/s3_file_reader.cpp
+++ b/be/src/io/fs/s3_file_reader.cpp
@@ -32,6 +32,7 @@
#include <utility>
#include "common/compiler_util.h" // IWYU pragma: keep
+#include "io/cache/block_file_cache.h"
#include "io/fs/err_utils.h"
#include "io/fs/obj_storage_client.h"
#include "io/fs/s3_common.h"
@@ -116,6 +117,9 @@ Status S3FileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_rea
size_t bytes_req = result.size;
char* to = result.data;
bytes_req = std::min(bytes_req, _file_size - offset);
+ VLOG_DEBUG << fmt::format("S3FileReader::read_at_impl offset={} size={}
path={} hash={}",
+ offset, result.size, _path.native(),
+
io::BlockFileCache::hash(_path.native()).to_string());
VLOG_DEBUG << "enter s3 read_at_impl, off=" << offset << " n=" << bytes_req
<< " req=" << result.size << " file size=" << _file_size;
if (UNLIKELY(bytes_req == 0)) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]