This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f3b50b3472 [enhance](cooldown) skip once failed follow cooldown tablet
(#16810)
f3b50b3472 is described below
commit f3b50b3472189a5a7f74f573135636b62898eb72
Author: AlexYue <[email protected]>
AuthorDate: Wed Mar 8 14:14:13 2023 +0800
[enhance](cooldown) skip once failed follow cooldown tablet (#16810)
---
be/src/olap/olap_server.cpp | 13 ++++++++++---
be/src/olap/tablet.cpp | 8 ++++++--
be/src/olap/tablet.h | 3 +++
be/src/olap/tablet_manager.cpp | 27 +++++++++++++--------------
4 files changed, 32 insertions(+), 19 deletions(-)
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 97cdbcf437..7a61cd3287 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -712,15 +712,22 @@ Status
StorageEngine::submit_seg_compaction_task(BetaRowsetWriter* writer,
void StorageEngine::_cooldown_tasks_producer_callback() {
int64_t interval = config::generate_cooldown_task_interval_sec;
+ // the cooldown replica may be slow to upload it's meta file, so we should
wait
+ // until it has done uploaded
+ int64_t skip_failed_interval = interval * 10;
do {
// these tables are ordered by priority desc
std::vector<TabletSharedPtr> tablets;
// TODO(luwei) : a more efficient way to get cooldown tablets
+ auto cur_time = time(nullptr);
// we should skip all the tablets which are not running and those
pending to do cooldown
- auto skip_tablet = [this](const TabletSharedPtr& tablet) -> bool {
+ // also tablets once failed to do follow cooldown
+ auto skip_tablet = [this, skip_failed_interval,
+ cur_time](const TabletSharedPtr& tablet) -> bool {
std::lock_guard<std::mutex> lock(_running_cooldown_mutex);
- return TABLET_RUNNING != tablet->tablet_state() ||
- _running_cooldown_tablets.find(tablet->tablet_id()) ==
+ return cur_time - tablet->last_failed_follow_cooldown_time() <
skip_failed_interval ||
+ TABLET_RUNNING != tablet->tablet_state() ||
+ _running_cooldown_tablets.find(tablet->tablet_id()) !=
_running_cooldown_tablets.end();
};
_tablet_manager->get_cooldown_tablets(&tablets,
std::move(skip_tablet));
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index b1f25b67ab..2c8449ee91 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1744,8 +1744,12 @@ Status Tablet::cooldown() {
// this replica is cooldown replica
RETURN_IF_ERROR(_cooldown_data());
} else {
- // try to follow cooldowned data from cooldown replica
- RETURN_IF_ERROR(_follow_cooldowned_data());
+ Status st = _follow_cooldowned_data();
+ if (UNLIKELY(!st.ok())) {
+ _last_failed_follow_cooldown_time = time(nullptr);
+ return st;
+ }
+ _last_failed_follow_cooldown_time = 0;
}
return Status::OK();
}
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index e22bebf890..5ff1be8fdd 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -310,6 +310,8 @@ public:
////////////////////////////////////////////////////////////////////////////
// begin cooldown functions
////////////////////////////////////////////////////////////////////////////
+ int64_t last_failed_follow_cooldown_time() const { return
_last_failed_follow_cooldown_time; }
+
// Cooldown to remote fs.
Status cooldown();
@@ -564,6 +566,7 @@ private:
// `_cold_compaction_lock` is used to serialize cold data compaction and
all operations that
// may delete compaction input rowsets.
std::mutex _cold_compaction_lock;
+ int64_t _last_failed_follow_cooldown_time = 0;
DISALLOW_COPY_AND_ASSIGN(Tablet);
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 9758356f59..f7e1ca25a5 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -1296,20 +1296,19 @@ void
TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets,
tablets_shard.tablet_map.begin(),
tablets_shard.tablet_map.end(),
[&candidates](auto& tablet_pair) {
candidates.emplace_back(tablet_pair.second); });
}
- std::for_each(
- candidates.begin(), candidates.end(),
- [&sort_ctx_vec, &skip_tablet](std::weak_ptr<Tablet>& t) {
- const TabletSharedPtr& tablet = t.lock();
- if (UNLIKELY(nullptr == tablet)) {
- return;
- }
- std::shared_lock rdlock(tablet->get_header_lock());
- int64_t cooldown_timestamp = -1;
- size_t file_size = -1;
- if (skip_tablet(tablet) &&
tablet->need_cooldown(&cooldown_timestamp, &file_size)) {
- sort_ctx_vec.emplace_back(tablet, cooldown_timestamp,
file_size);
- }
- });
+ auto get_cooldown_tablet = [&sort_ctx_vec,
&skip_tablet](std::weak_ptr<Tablet>& t) {
+ const TabletSharedPtr& tablet = t.lock();
+ if (UNLIKELY(nullptr == tablet)) {
+ return;
+ }
+ std::shared_lock rdlock(tablet->get_header_lock());
+ int64_t cooldown_timestamp = -1;
+ size_t file_size = -1;
+ if (!skip_tablet(tablet) && tablet->need_cooldown(&cooldown_timestamp,
&file_size)) {
+ sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size);
+ }
+ };
+ std::for_each(candidates.begin(), candidates.end(), get_cooldown_tablet);
std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]