This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 14d0433a76c9015e9fe7e2048c6ddb8fac5bc0e8 Author: deardeng <[email protected]> AuthorDate: Fri May 10 21:32:34 2024 +0800 [improve](path gc) Execute path gc interval adaptive to disk size (#34538) --- be/src/olap/olap_server.cpp | 40 +++++++++++++++++++++++++++++++++++----- be/src/olap/storage_engine.h | 2 ++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 98b8ef45e4e..0a8bcedf14e 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -394,20 +394,50 @@ void StorageEngine::_unused_rowset_monitor_thread_callback() { } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))); } +int32_t StorageEngine::_auto_get_interval_by_disk_capacity(DataDir* data_dir) { + double disk_used = data_dir->get_usage(0); + double remain_used = 1 - disk_used; + DCHECK(remain_used >= 0 && remain_used <= 1); + DCHECK(config::path_gc_check_interval_second >= 0); + int32_t ret = 0; + if (remain_used > 0.9) { + // if config::path_gc_check_interval_second == 24h + ret = config::path_gc_check_interval_second; + } else if (remain_used > 0.7) { + // 12h + ret = config::path_gc_check_interval_second / 2; + } else if (remain_used > 0.5) { + // 6h + ret = config::path_gc_check_interval_second / 4; + } else if (remain_used > 0.3) { + // 4h + ret = config::path_gc_check_interval_second / 6; + } else { + // 3h + ret = config::path_gc_check_interval_second / 8; + } + return ret; +} + void StorageEngine::_path_gc_thread_callback(DataDir* data_dir) { LOG(INFO) << "try to start path gc thread!"; - int32_t interval = config::path_gc_check_interval_second; + int32_t last_exec_time = 0; do { - LOG(INFO) << "try to perform path gc!"; - data_dir->perform_path_gc(); + int32_t current_time = time(nullptr); - interval = config::path_gc_check_interval_second; + int32_t interval = _auto_get_interval_by_disk_capacity(data_dir); if (interval <= 0) { LOG(WARNING) << "path gc thread check interval config is illegal:" << interval << "will be forced set to half hour"; interval = 1800; // 0.5 hour } - } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))); + if (current_time - last_exec_time >= interval) { + LOG(INFO) << "try to perform path gc! disk remain [" << 1 - data_dir->get_usage(0) + << "] internal [" << interval << "]"; + data_dir->perform_path_gc(); + last_exec_time = time(nullptr); + } + } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(5))); LOG(INFO) << "stop path gc thread!"; } diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 4fb2041cc6a..f2b5f421670 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -337,6 +337,8 @@ private: int _get_and_set_next_disk_index(int64 partition_id, TStorageMedium::type storage_medium); + int32_t _auto_get_interval_by_disk_capacity(DataDir* data_dir); + private: EngineOptions _options; std::mutex _store_lock; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
