This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new bda6d12cd5d branch-3.1: [fix](warmup) avoid calling recycle_cache 
after rebalance #53339 (#53338)
bda6d12cd5d is described below

commit bda6d12cd5d187aaa8962f6094b0e0edc21ca62d
Author: Kaijie Chen <[email protected]>
AuthorDate: Thu Jul 24 14:35:28 2025 +0800

    branch-3.1: [fix](warmup) avoid calling recycle_cache after rebalance 
#53339 (#53338)
    
    backport #53339
---
 be/src/cloud/cloud_tablet.cpp          | 42 ++++++++++++++++------------------
 be/src/cloud/cloud_tablet.h            |  9 +++++++-
 be/src/cloud/cloud_warm_up_manager.cpp | 26 ++++++++-------------
 be/src/cloud/cloud_warm_up_manager.h   |  5 ++--
 4 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index 6132270c4fd..d683d94afe7 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -494,7 +494,11 @@ uint64_t CloudTablet::delete_expired_stale_rowsets() {
         }
         _reconstruct_version_tracker_if_necessary();
     }
-    recycle_cached_data(expired_rowsets);
+    auto recycled_rowsets = recycle_cached_data(expired_rowsets);
+    if (!recycled_rowsets.empty()) {
+        auto& manager = 
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
+        manager.recycle_cache(tablet_id(), recycled_rowsets);
+    }
     if (config::enable_mow_verbose_log) {
         LOG_INFO("finish delete_expired_stale_rowset for tablet={}", 
tablet_id());
     }
@@ -564,15 +568,11 @@ void CloudTablet::remove_unused_rowsets() {
     }
 
     {
-        std::vector<RowsetId> rowset_ids;
-        std::vector<int64_t> num_segments;
-        std::vector<std::vector<std::string>> index_file_names;
+        std::vector<RecycledRowsets> recycled_rowsets;
 
         for (auto& rs : removed_rowsets) {
-            rowset_ids.push_back(rs->rowset_id());
-            num_segments.push_back(rs->num_segments());
             auto index_names = rs->get_index_file_names();
-            index_file_names.push_back(index_names);
+            recycled_rowsets.emplace_back(rs->rowset_id(), rs->num_segments(), 
index_names);
             int64_t segment_size_sum = 0;
             for (int32_t i = 0; i < rs->num_segments(); i++) {
                 segment_size_sum += rs->rowset_meta()->segment_file_size(i);
@@ -582,10 +582,10 @@ void CloudTablet::remove_unused_rowsets() {
             g_file_cache_recycle_cached_data_index_num << index_names.size();
         }
 
-        if (removed_rowsets.size() > 0) {
+        if (recycled_rowsets.size() > 0) {
             auto& manager =
                     
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
-            manager.recycle_cache(tablet_id(), rowset_ids, num_segments, 
index_file_names);
+            manager.recycle_cache(tablet_id(), recycled_rowsets);
         }
     }
 
@@ -626,14 +626,17 @@ void CloudTablet::update_base_size(const Rowset& rs) {
 }
 
 void CloudTablet::clear_cache() {
-    CloudTablet::recycle_cached_data(get_snapshot_rowset(true));
+    auto recycled_rowsets = 
CloudTablet::recycle_cached_data(get_snapshot_rowset(true));
+    if (!recycled_rowsets.empty()) {
+        auto& manager = 
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
+        manager.recycle_cache(tablet_id(), recycled_rowsets);
+    }
     _engine.tablet_mgr().erase_tablet(tablet_id());
 }
 
-void CloudTablet::recycle_cached_data(const std::vector<RowsetSharedPtr>& 
rowsets) {
-    std::vector<RowsetId> rowset_ids;
-    std::vector<int64_t> num_segments;
-    std::vector<std::vector<std::string>> index_file_names;
+std::vector<RecycledRowsets> CloudTablet::recycle_cached_data(
+        const std::vector<RowsetSharedPtr>& rowsets) {
+    std::vector<RecycledRowsets> recycled_rowsets;
     for (const auto& rs : rowsets) {
         // rowsets and tablet._rs_version_map each hold a rowset shared_ptr, 
so at this point, the reference count of the shared_ptr is at least 2.
         if (rs.use_count() > 2) {
@@ -642,10 +645,9 @@ void CloudTablet::recycle_cached_data(const 
std::vector<RowsetSharedPtr>& rowset
             continue;
         }
         rs->clear_cache();
-        rowset_ids.push_back(rs->rowset_id());
-        num_segments.push_back(rs->num_segments());
         auto index_names = rs->get_index_file_names();
-        index_file_names.push_back(index_names);
+        recycled_rowsets.emplace_back(rs->rowset_id(), rs->num_segments(), 
index_names);
+
         int64_t segment_size_sum = 0;
         for (int32_t i = 0; i < rs->num_segments(); i++) {
             segment_size_sum += rs->rowset_meta()->segment_file_size(i);
@@ -654,11 +656,7 @@ void CloudTablet::recycle_cached_data(const 
std::vector<RowsetSharedPtr>& rowset
         g_file_cache_recycle_cached_data_segment_size << segment_size_sum;
         g_file_cache_recycle_cached_data_index_num << index_names.size();
     }
-    if (!rowsets.empty()) {
-        auto& manager = 
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
-        manager.recycle_cache(rowsets.front()->rowset_meta()->tablet_id(), 
rowset_ids, num_segments,
-                              index_file_names);
-    }
+    return recycled_rowsets;
 }
 
 void CloudTablet::reset_approximate_stats(int64_t num_rowsets, int64_t 
num_segments,
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index 218b1a13b73..7fc52438ca3 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -41,6 +41,12 @@ struct SyncRowsetStats {
     int64_t tablet_meta_cache_miss {0};
 };
 
+struct RecycledRowsets {
+    RowsetId rowset_id;
+    int64_t num_segments;
+    std::vector<std::string> index_file_names;
+};
+
 class CloudTablet final : public BaseTablet {
 public:
     CloudTablet(CloudStorageEngine& engine, TabletMetaSharedPtr tablet_meta);
@@ -270,7 +276,8 @@ public:
     void add_unused_rowsets(const std::vector<RowsetSharedPtr>& rowsets);
     void remove_unused_rowsets();
 
-    static void recycle_cached_data(const std::vector<RowsetSharedPtr>& 
rowsets);
+    static std::vector<RecycledRowsets> recycle_cached_data(
+            const std::vector<RowsetSharedPtr>& rowsets);
 
 private:
     // FIXME(plat1ko): No need to record base size if rowsets are ordered by 
version
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp 
b/be/src/cloud/cloud_warm_up_manager.cpp
index 60f9beae547..c0d1462cbc3 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -567,36 +567,30 @@ void CloudWarmUpManager::warm_up_rowset(RowsetMeta& 
rs_meta) {
     }
 }
 
-void CloudWarmUpManager::recycle_cache(
-        int64_t tablet_id, const std::vector<RowsetId>& rowset_ids,
-        const std::vector<int64_t>& num_segments,
-        const std::vector<std::vector<std::string>>& index_file_names) {
-    LOG(INFO) << "recycle_cache: tablet_id=" << tablet_id << ", num_rowsets=" 
<< rowset_ids.size();
+void CloudWarmUpManager::recycle_cache(int64_t tablet_id,
+                                       const std::vector<RecycledRowsets>& 
rowsets) {
+    LOG(INFO) << "recycle_cache: tablet_id=" << tablet_id << ", num_rowsets=" 
<< rowsets.size();
     auto replicas = get_replica_info(tablet_id);
     if (replicas.empty()) {
         return;
     }
-    if (rowset_ids.size() != num_segments.size()) {
-        LOG(WARNING) << "recycle_cache: rowset_ids size mismatch with 
num_segments";
-        return;
-    }
 
     PRecycleCacheRequest request;
-    for (int i = 0; i < rowset_ids.size(); i++) {
+    for (const auto& rowset : rowsets) {
         RecycleCacheMeta* meta = request.add_cache_metas();
         meta->set_tablet_id(tablet_id);
-        meta->set_rowset_id(rowset_ids[i].to_string());
-        meta->set_num_segments(num_segments[i]);
-        for (const auto& name : index_file_names[i]) {
+        meta->set_rowset_id(rowset.rowset_id.to_string());
+        meta->set_num_segments(rowset.num_segments);
+        for (const auto& name : rowset.index_file_names) {
             meta->add_index_file_names(name);
         }
-        g_file_cache_recycle_cache_requested_segment_num << num_segments[i];
-        g_file_cache_recycle_cache_requested_index_num << 
index_file_names[i].size();
+        g_file_cache_recycle_cache_requested_segment_num << 
rowset.num_segments;
+        g_file_cache_recycle_cache_requested_index_num << 
rowset.index_file_names.size();
     }
+    auto dns_cache = ExecEnv::GetInstance()->dns_cache();
     for (auto& replica : replicas) {
         // send sync request
         std::string host = replica.host;
-        auto dns_cache = ExecEnv::GetInstance()->dns_cache();
         if (dns_cache == nullptr) {
             LOG(WARNING) << "DNS cache is not initialized, skipping hostname 
resolve";
         } else if (!is_valid_ip(replica.host)) {
diff --git a/be/src/cloud/cloud_warm_up_manager.h 
b/be/src/cloud/cloud_warm_up_manager.h
index 13ba906a4e5..6feef0e9d42 100644
--- a/be/src/cloud/cloud_warm_up_manager.h
+++ b/be/src/cloud/cloud_warm_up_manager.h
@@ -27,6 +27,7 @@
 #include <vector>
 
 #include "cloud/cloud_storage_engine.h"
+#include "cloud/cloud_tablet.h"
 #include "common/status.h"
 #include "gen_cpp/BackendService.h"
 
@@ -73,9 +74,7 @@ public:
 
     void warm_up_rowset(RowsetMeta& rs_meta);
 
-    void recycle_cache(int64_t tablet_id, const std::vector<RowsetId>& 
rowset_ids,
-                       const std::vector<int64_t>& num_segments,
-                       const std::vector<std::vector<std::string>>& 
index_file_names);
+    void recycle_cache(int64_t tablet_id, const std::vector<RecycledRowsets>& 
rowsets);
 
 private:
     void handle_jobs();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to