This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f3b50b3472 [enhance](cooldown) skip once failed follow cooldown tablet 
(#16810)
f3b50b3472 is described below

commit f3b50b3472189a5a7f74f573135636b62898eb72
Author: AlexYue <[email protected]>
AuthorDate: Wed Mar 8 14:14:13 2023 +0800

    [enhance](cooldown) skip once failed follow cooldown tablet (#16810)
---
 be/src/olap/olap_server.cpp    | 13 ++++++++++---
 be/src/olap/tablet.cpp         |  8 ++++++--
 be/src/olap/tablet.h           |  3 +++
 be/src/olap/tablet_manager.cpp | 27 +++++++++++++--------------
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 97cdbcf437..7a61cd3287 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -712,15 +712,22 @@ Status 
StorageEngine::submit_seg_compaction_task(BetaRowsetWriter* writer,
 
 void StorageEngine::_cooldown_tasks_producer_callback() {
     int64_t interval = config::generate_cooldown_task_interval_sec;
+    // the cooldown replica may be slow to upload it's meta file, so we should 
wait
+    // until it has done uploaded
+    int64_t skip_failed_interval = interval * 10;
     do {
         // these tables are ordered by priority desc
         std::vector<TabletSharedPtr> tablets;
         // TODO(luwei) : a more efficient way to get cooldown tablets
+        auto cur_time = time(nullptr);
         // we should skip all the tablets which are not running and those 
pending to do cooldown
-        auto skip_tablet = [this](const TabletSharedPtr& tablet) -> bool {
+        // also tablets once failed to do follow cooldown
+        auto skip_tablet = [this, skip_failed_interval,
+                            cur_time](const TabletSharedPtr& tablet) -> bool {
             std::lock_guard<std::mutex> lock(_running_cooldown_mutex);
-            return TABLET_RUNNING != tablet->tablet_state() ||
-                   _running_cooldown_tablets.find(tablet->tablet_id()) ==
+            return cur_time - tablet->last_failed_follow_cooldown_time() < 
skip_failed_interval ||
+                   TABLET_RUNNING != tablet->tablet_state() ||
+                   _running_cooldown_tablets.find(tablet->tablet_id()) !=
                            _running_cooldown_tablets.end();
         };
         _tablet_manager->get_cooldown_tablets(&tablets, 
std::move(skip_tablet));
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index b1f25b67ab..2c8449ee91 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1744,8 +1744,12 @@ Status Tablet::cooldown() {
         // this replica is cooldown replica
         RETURN_IF_ERROR(_cooldown_data());
     } else {
-        // try to follow cooldowned data from cooldown replica
-        RETURN_IF_ERROR(_follow_cooldowned_data());
+        Status st = _follow_cooldowned_data();
+        if (UNLIKELY(!st.ok())) {
+            _last_failed_follow_cooldown_time = time(nullptr);
+            return st;
+        }
+        _last_failed_follow_cooldown_time = 0;
     }
     return Status::OK();
 }
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index e22bebf890..5ff1be8fdd 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -310,6 +310,8 @@ public:
     
////////////////////////////////////////////////////////////////////////////
     // begin cooldown functions
     
////////////////////////////////////////////////////////////////////////////
+    int64_t last_failed_follow_cooldown_time() const { return 
_last_failed_follow_cooldown_time; }
+
     // Cooldown to remote fs.
     Status cooldown();
 
@@ -564,6 +566,7 @@ private:
     // `_cold_compaction_lock` is used to serialize cold data compaction and 
all operations that
     // may delete compaction input rowsets.
     std::mutex _cold_compaction_lock;
+    int64_t _last_failed_follow_cooldown_time = 0;
 
     DISALLOW_COPY_AND_ASSIGN(Tablet);
 
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 9758356f59..f7e1ca25a5 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -1296,20 +1296,19 @@ void 
TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets,
                 tablets_shard.tablet_map.begin(), 
tablets_shard.tablet_map.end(),
                 [&candidates](auto& tablet_pair) { 
candidates.emplace_back(tablet_pair.second); });
     }
-    std::for_each(
-            candidates.begin(), candidates.end(),
-            [&sort_ctx_vec, &skip_tablet](std::weak_ptr<Tablet>& t) {
-                const TabletSharedPtr& tablet = t.lock();
-                if (UNLIKELY(nullptr == tablet)) {
-                    return;
-                }
-                std::shared_lock rdlock(tablet->get_header_lock());
-                int64_t cooldown_timestamp = -1;
-                size_t file_size = -1;
-                if (skip_tablet(tablet) && 
tablet->need_cooldown(&cooldown_timestamp, &file_size)) {
-                    sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, 
file_size);
-                }
-            });
+    auto get_cooldown_tablet = [&sort_ctx_vec, 
&skip_tablet](std::weak_ptr<Tablet>& t) {
+        const TabletSharedPtr& tablet = t.lock();
+        if (UNLIKELY(nullptr == tablet)) {
+            return;
+        }
+        std::shared_lock rdlock(tablet->get_header_lock());
+        int64_t cooldown_timestamp = -1;
+        size_t file_size = -1;
+        if (!skip_tablet(tablet) && tablet->need_cooldown(&cooldown_timestamp, 
&file_size)) {
+            sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size);
+        }
+    };
+    std::for_each(candidates.begin(), candidates.end(), get_cooldown_tablet);
 
     std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end());
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to