This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new bda6d12cd5d branch-3.1: [fix](warmup) avoid calling recycle_cache
after rebalance #53339 (#53338)
bda6d12cd5d is described below
commit bda6d12cd5d187aaa8962f6094b0e0edc21ca62d
Author: Kaijie Chen <[email protected]>
AuthorDate: Thu Jul 24 14:35:28 2025 +0800
branch-3.1: [fix](warmup) avoid calling recycle_cache after rebalance
#53339 (#53338)
backport #53339
---
be/src/cloud/cloud_tablet.cpp | 42 ++++++++++++++++------------------
be/src/cloud/cloud_tablet.h | 9 +++++++-
be/src/cloud/cloud_warm_up_manager.cpp | 26 ++++++++-------------
be/src/cloud/cloud_warm_up_manager.h | 5 ++--
4 files changed, 40 insertions(+), 42 deletions(-)
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index 6132270c4fd..d683d94afe7 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -494,7 +494,11 @@ uint64_t CloudTablet::delete_expired_stale_rowsets() {
}
_reconstruct_version_tracker_if_necessary();
}
- recycle_cached_data(expired_rowsets);
+ auto recycled_rowsets = recycle_cached_data(expired_rowsets);
+ if (!recycled_rowsets.empty()) {
+ auto& manager =
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
+ manager.recycle_cache(tablet_id(), recycled_rowsets);
+ }
if (config::enable_mow_verbose_log) {
LOG_INFO("finish delete_expired_stale_rowset for tablet={}",
tablet_id());
}
@@ -564,15 +568,11 @@ void CloudTablet::remove_unused_rowsets() {
}
{
- std::vector<RowsetId> rowset_ids;
- std::vector<int64_t> num_segments;
- std::vector<std::vector<std::string>> index_file_names;
+ std::vector<RecycledRowsets> recycled_rowsets;
for (auto& rs : removed_rowsets) {
- rowset_ids.push_back(rs->rowset_id());
- num_segments.push_back(rs->num_segments());
auto index_names = rs->get_index_file_names();
- index_file_names.push_back(index_names);
+ recycled_rowsets.emplace_back(rs->rowset_id(), rs->num_segments(),
index_names);
int64_t segment_size_sum = 0;
for (int32_t i = 0; i < rs->num_segments(); i++) {
segment_size_sum += rs->rowset_meta()->segment_file_size(i);
@@ -582,10 +582,10 @@ void CloudTablet::remove_unused_rowsets() {
g_file_cache_recycle_cached_data_index_num << index_names.size();
}
- if (removed_rowsets.size() > 0) {
+ if (recycled_rowsets.size() > 0) {
auto& manager =
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
- manager.recycle_cache(tablet_id(), rowset_ids, num_segments,
index_file_names);
+ manager.recycle_cache(tablet_id(), recycled_rowsets);
}
}
@@ -626,14 +626,17 @@ void CloudTablet::update_base_size(const Rowset& rs) {
}
void CloudTablet::clear_cache() {
- CloudTablet::recycle_cached_data(get_snapshot_rowset(true));
+ auto recycled_rowsets =
CloudTablet::recycle_cached_data(get_snapshot_rowset(true));
+ if (!recycled_rowsets.empty()) {
+ auto& manager =
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
+ manager.recycle_cache(tablet_id(), recycled_rowsets);
+ }
_engine.tablet_mgr().erase_tablet(tablet_id());
}
-void CloudTablet::recycle_cached_data(const std::vector<RowsetSharedPtr>&
rowsets) {
- std::vector<RowsetId> rowset_ids;
- std::vector<int64_t> num_segments;
- std::vector<std::vector<std::string>> index_file_names;
+std::vector<RecycledRowsets> CloudTablet::recycle_cached_data(
+ const std::vector<RowsetSharedPtr>& rowsets) {
+ std::vector<RecycledRowsets> recycled_rowsets;
for (const auto& rs : rowsets) {
// rowsets and tablet._rs_version_map each hold a rowset shared_ptr,
so at this point, the reference count of the shared_ptr is at least 2.
if (rs.use_count() > 2) {
@@ -642,10 +645,9 @@ void CloudTablet::recycle_cached_data(const
std::vector<RowsetSharedPtr>& rowset
continue;
}
rs->clear_cache();
- rowset_ids.push_back(rs->rowset_id());
- num_segments.push_back(rs->num_segments());
auto index_names = rs->get_index_file_names();
- index_file_names.push_back(index_names);
+ recycled_rowsets.emplace_back(rs->rowset_id(), rs->num_segments(),
index_names);
+
int64_t segment_size_sum = 0;
for (int32_t i = 0; i < rs->num_segments(); i++) {
segment_size_sum += rs->rowset_meta()->segment_file_size(i);
@@ -654,11 +656,7 @@ void CloudTablet::recycle_cached_data(const
std::vector<RowsetSharedPtr>& rowset
g_file_cache_recycle_cached_data_segment_size << segment_size_sum;
g_file_cache_recycle_cached_data_index_num << index_names.size();
}
- if (!rowsets.empty()) {
- auto& manager =
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
- manager.recycle_cache(rowsets.front()->rowset_meta()->tablet_id(),
rowset_ids, num_segments,
- index_file_names);
- }
+ return recycled_rowsets;
}
void CloudTablet::reset_approximate_stats(int64_t num_rowsets, int64_t
num_segments,
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index 218b1a13b73..7fc52438ca3 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -41,6 +41,12 @@ struct SyncRowsetStats {
int64_t tablet_meta_cache_miss {0};
};
+struct RecycledRowsets {
+ RowsetId rowset_id;
+ int64_t num_segments;
+ std::vector<std::string> index_file_names;
+};
+
class CloudTablet final : public BaseTablet {
public:
CloudTablet(CloudStorageEngine& engine, TabletMetaSharedPtr tablet_meta);
@@ -270,7 +276,8 @@ public:
void add_unused_rowsets(const std::vector<RowsetSharedPtr>& rowsets);
void remove_unused_rowsets();
- static void recycle_cached_data(const std::vector<RowsetSharedPtr>&
rowsets);
+ static std::vector<RecycledRowsets> recycle_cached_data(
+ const std::vector<RowsetSharedPtr>& rowsets);
private:
// FIXME(plat1ko): No need to record base size if rowsets are ordered by
version
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp
b/be/src/cloud/cloud_warm_up_manager.cpp
index 60f9beae547..c0d1462cbc3 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -567,36 +567,30 @@ void CloudWarmUpManager::warm_up_rowset(RowsetMeta&
rs_meta) {
}
}
-void CloudWarmUpManager::recycle_cache(
- int64_t tablet_id, const std::vector<RowsetId>& rowset_ids,
- const std::vector<int64_t>& num_segments,
- const std::vector<std::vector<std::string>>& index_file_names) {
- LOG(INFO) << "recycle_cache: tablet_id=" << tablet_id << ", num_rowsets="
<< rowset_ids.size();
+void CloudWarmUpManager::recycle_cache(int64_t tablet_id,
+ const std::vector<RecycledRowsets>&
rowsets) {
+ LOG(INFO) << "recycle_cache: tablet_id=" << tablet_id << ", num_rowsets="
<< rowsets.size();
auto replicas = get_replica_info(tablet_id);
if (replicas.empty()) {
return;
}
- if (rowset_ids.size() != num_segments.size()) {
- LOG(WARNING) << "recycle_cache: rowset_ids size mismatch with
num_segments";
- return;
- }
PRecycleCacheRequest request;
- for (int i = 0; i < rowset_ids.size(); i++) {
+ for (const auto& rowset : rowsets) {
RecycleCacheMeta* meta = request.add_cache_metas();
meta->set_tablet_id(tablet_id);
- meta->set_rowset_id(rowset_ids[i].to_string());
- meta->set_num_segments(num_segments[i]);
- for (const auto& name : index_file_names[i]) {
+ meta->set_rowset_id(rowset.rowset_id.to_string());
+ meta->set_num_segments(rowset.num_segments);
+ for (const auto& name : rowset.index_file_names) {
meta->add_index_file_names(name);
}
- g_file_cache_recycle_cache_requested_segment_num << num_segments[i];
- g_file_cache_recycle_cache_requested_index_num <<
index_file_names[i].size();
+ g_file_cache_recycle_cache_requested_segment_num <<
rowset.num_segments;
+ g_file_cache_recycle_cache_requested_index_num <<
rowset.index_file_names.size();
}
+ auto dns_cache = ExecEnv::GetInstance()->dns_cache();
for (auto& replica : replicas) {
// send sync request
std::string host = replica.host;
- auto dns_cache = ExecEnv::GetInstance()->dns_cache();
if (dns_cache == nullptr) {
LOG(WARNING) << "DNS cache is not initialized, skipping hostname
resolve";
} else if (!is_valid_ip(replica.host)) {
diff --git a/be/src/cloud/cloud_warm_up_manager.h
b/be/src/cloud/cloud_warm_up_manager.h
index 13ba906a4e5..6feef0e9d42 100644
--- a/be/src/cloud/cloud_warm_up_manager.h
+++ b/be/src/cloud/cloud_warm_up_manager.h
@@ -27,6 +27,7 @@
#include <vector>
#include "cloud/cloud_storage_engine.h"
+#include "cloud/cloud_tablet.h"
#include "common/status.h"
#include "gen_cpp/BackendService.h"
@@ -73,9 +74,7 @@ public:
void warm_up_rowset(RowsetMeta& rs_meta);
- void recycle_cache(int64_t tablet_id, const std::vector<RowsetId>&
rowset_ids,
- const std::vector<int64_t>& num_segments,
- const std::vector<std::vector<std::string>>&
index_file_names);
+ void recycle_cache(int64_t tablet_id, const std::vector<RecycledRowsets>&
rowsets);
private:
void handle_jobs();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]