This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 6eb39b9c749a2332def4f0a0fecff0c08b21a332 Author: AlexYue <[email protected]> AuthorDate: Thu Aug 22 17:13:59 2024 +0800 [enhance](Cooldown) Add metric to trace cooldown task and unused remote files caused by failed upload and cold compaction (#39293) Now user can see the procedure of the cooldown tasks through bvar along with the unused files generated by the failed compaction and upload. --- be/src/olap/data_dir.cpp | 1 + be/src/olap/tablet.cpp | 14 ++++++++++++-- be/src/olap/tablet.h | 7 +++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 640c1aa4f81..4070bd1dd43 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -1006,6 +1006,7 @@ void DataDir::perform_remote_rowset_gc() { auto st = fs->batch_delete(seg_paths); if (st.ok()) { deleted_keys.push_back(std::move(key)); + unused_remote_rowset_num << -1; } else { LOG(WARNING) << "failed to delete remote rowset. err=" << st; } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 1efb9afbf06..48e1efb4e6b 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -148,6 +148,8 @@ namespace { bvar::Adder<uint64_t> exceed_version_limit_counter; bvar::Window<bvar::Adder<uint64_t>> exceed_version_limit_counter_minute( &exceed_version_limit_counter, 60); +bvar::Adder<uint64_t> cooldown_pending_task("cooldown_pending_task"); +bvar::Adder<uint64_t> cooldown_processing_task("cooldown_processing_task"); void set_last_failure_time(Tablet* tablet, const Compaction& compaction, int64_t ms) { switch (compaction.compaction_type()) { @@ -168,6 +170,8 @@ void set_last_failure_time(Tablet* tablet, const Compaction& compaction, int64_t } // namespace +bvar::Adder<uint64_t> unused_remote_rowset_num("unused_remote_rowset_num"); + WriteCooldownMetaExecutors::WriteCooldownMetaExecutors(size_t executor_nums) : _executor_nums(executor_nums) { for (size_t i = 0; i < _executor_nums; i++) { @@ -230,8 +234,13 @@ void WriteCooldownMetaExecutors::WriteCooldownMetaExecutors::submit(TabletShared VLOG_DEBUG << "tablet " << t->tablet_id() << " is not cooldown replica"; }; - _executors[_get_executor_pos(tablet_id)]->offer( - [task = std::move(async_write_task)]() { task(); }); + cooldown_pending_task << 1; + _executors[_get_executor_pos(tablet_id)]->offer([task = std::move(async_write_task)]() { + cooldown_pending_task << -1; + cooldown_processing_task << 1; + task(); + cooldown_processing_task << -1; + }); } Tablet::Tablet(StorageEngine& engine, TabletMetaSharedPtr tablet_meta, DataDir* data_dir, @@ -2387,6 +2396,7 @@ void Tablet::record_unused_remote_rowset(const RowsetId& rowset_id, const std::s LOG(WARNING) << "failed to record unused remote rowset. tablet_id=" << tablet_id() << " rowset_id=" << rowset_id << " resource_id=" << resource; } + unused_remote_rowset_num << 1; } Status Tablet::remove_all_remote_rowsets() { diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index fa11c2d8685..4cd2a355586 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -51,6 +51,11 @@ #include "util/once.h" #include "util/slice.h" +namespace bvar { +template <typename T> +class Adder; +} + namespace doris { class Tablet; @@ -78,6 +83,8 @@ enum SortType : int; enum TabletStorageType { STORAGE_TYPE_LOCAL, STORAGE_TYPE_REMOTE, STORAGE_TYPE_REMOTE_AND_LOCAL }; +extern bvar::Adder<uint64_t> unused_remote_rowset_num; + static inline constexpr auto TRACE_TABLET_LOCK_THRESHOLD = std::chrono::seconds(1); struct WriteCooldownMetaExecutors { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
