This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4c3a96e7df18f09db3170199056386b44596533a Author: Xinyi Zou <[email protected]> AuthorDate: Thu Feb 22 17:47:07 2024 +0800 [fix](memory) Fix LRU cache frequent prune (#31220) --- be/src/common/config.cpp | 3 ++- be/src/common/config.h | 5 +++-- be/src/olap/olap_server.cpp | 13 +++++++++++-- be/src/olap/storage_engine.h | 1 + be/src/runtime/memory/cache_manager.cpp | 18 ++++++++++++------ be/src/runtime/memory/cache_manager.h | 19 +++++++++++++++++++ be/src/runtime/memory/cache_policy.h | 2 +- be/src/runtime/memory/lru_cache_policy.h | 30 ++++++++++++++++++++++-------- 8 files changed, 71 insertions(+), 20 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 709526dbf42..cd31a8a0136 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -270,7 +270,8 @@ DEFINE_mInt64(column_dictionary_key_size_threshold, "0"); DEFINE_mInt64(memory_limitation_per_thread_for_schema_change_bytes, "2147483648"); DEFINE_mInt64(memory_limitation_per_thread_for_storage_migration_bytes, "100000000"); -DEFINE_mInt32(cache_prune_stale_interval, "10"); +DEFINE_mInt32(cache_prune_interval_sec, "10"); +DEFINE_mInt32(cache_periodic_prune_stale_sweep_sec, "300"); // the clean interval of tablet lookup cache DEFINE_mInt32(tablet_lookup_cache_stale_sweep_time_sec, "30"); DEFINE_mInt32(point_query_row_cache_stale_sweep_time_sec, "300"); diff --git a/be/src/common/config.h b/be/src/common/config.h index e0e5c52e30c..0230b2d768c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -312,8 +312,9 @@ DECLARE_mInt64(column_dictionary_key_size_threshold); DECLARE_mInt64(memory_limitation_per_thread_for_schema_change_bytes); DECLARE_mInt64(memory_limitation_per_thread_for_storage_migration_bytes); -// the prune stale interval of all cache -DECLARE_mInt32(cache_prune_stale_interval); +// all cache prune interval, used by GC and periodic thread. +DECLARE_mInt32(cache_prune_interval_sec); +DECLARE_mInt32(cache_periodic_prune_stale_sweep_sec); // the clean interval of tablet lookup cache DECLARE_mInt32(tablet_lookup_cache_stale_sweep_time_sec); DECLARE_mInt32(point_query_row_cache_stale_sweep_time_sec); diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 1fa99fcd31b..b190163c520 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -270,7 +270,7 @@ Status StorageEngine::start_bg_threads() { } void StorageEngine::_cache_clean_callback() { - int32_t interval = config::cache_prune_stale_interval; + int32_t interval = config::cache_periodic_prune_stale_sweep_sec; while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) { if (interval <= 0) { LOG(WARNING) << "config of cache clean interval is illegal: [" << interval @@ -281,7 +281,6 @@ void StorageEngine::_cache_clean_callback() { CacheManager::instance()->for_each_cache_prune_stale(); // Dynamically modify the config to clear the cache, each time the disable cache will only be cleared once. - // TODO, Support page cache and other caches. if (config::disable_segment_cache) { if (!_clear_segment_cache) { CacheManager::instance()->clear_once(CachePolicy::CacheType::SEGMENT_CACHE); @@ -290,6 +289,16 @@ void StorageEngine::_cache_clean_callback() { } else { _clear_segment_cache = false; } + if (config::disable_storage_page_cache) { + if (!_clear_page_cache) { + CacheManager::instance()->clear_once(CachePolicy::CacheType::DATA_PAGE_CACHE); + CacheManager::instance()->clear_once(CachePolicy::CacheType::INDEXPAGE_CACHE); + CacheManager::instance()->clear_once(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE); + _clear_page_cache = true; + } + } else { + _clear_page_cache = false; + } } } diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index b06e431c6e8..153c2273305 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -470,6 +470,7 @@ private: std::shared_mutex _async_publish_lock; bool _clear_segment_cache = false; + bool _clear_page_cache = false; std::atomic<bool> _need_clean_trash {false}; diff --git a/be/src/runtime/memory/cache_manager.cpp b/be/src/runtime/memory/cache_manager.cpp index 09107817086..4b242f5d3ae 100644 --- a/be/src/runtime/memory/cache_manager.cpp +++ b/be/src/runtime/memory/cache_manager.cpp @@ -26,7 +26,7 @@ int64_t CacheManager::for_each_cache_prune_stale_wrap( std::function<void(CachePolicy* cache_policy)> func, RuntimeProfile* profile) { int64_t freed_size = 0; std::lock_guard<std::mutex> l(_caches_lock); - for (auto cache_policy : _caches) { + for (auto* cache_policy : _caches) { if (!cache_policy->enable_prune()) { continue; } @@ -40,18 +40,24 @@ int64_t CacheManager::for_each_cache_prune_stale_wrap( } int64_t CacheManager::for_each_cache_prune_stale(RuntimeProfile* profile) { - return for_each_cache_prune_stale_wrap( - [](CachePolicy* cache_policy) { cache_policy->prune_stale(); }, profile); + if (need_prune(&_last_prune_stale_timestamp, "stale")) { + return for_each_cache_prune_stale_wrap( + [](CachePolicy* cache_policy) { cache_policy->prune_stale(); }, profile); + } + return 0; } int64_t CacheManager::for_each_cache_prune_all(RuntimeProfile* profile) { - return for_each_cache_prune_stale_wrap( - [](CachePolicy* cache_policy) { cache_policy->prune_all(false); }, profile); + if (need_prune(&_last_prune_all_timestamp, "all")) { + return for_each_cache_prune_stale_wrap( + [](CachePolicy* cache_policy) { cache_policy->prune_all(false); }, profile); + } + return 0; } void CacheManager::clear_once(CachePolicy::CacheType type) { std::lock_guard<std::mutex> l(_caches_lock); - for (auto cache_policy : _caches) { + for (auto* cache_policy : _caches) { if (cache_policy->type() == type) { cache_policy->prune_all(true); // will print log } diff --git a/be/src/runtime/memory/cache_manager.h b/be/src/runtime/memory/cache_manager.h index 8fdce10d699..d17e8eff986 100644 --- a/be/src/runtime/memory/cache_manager.h +++ b/be/src/runtime/memory/cache_manager.h @@ -17,9 +17,12 @@ #pragma once +#include <string> + #include "runtime/exec_env.h" #include "runtime/memory/cache_policy.h" #include "util/runtime_profile.h" +#include "util/time.h" namespace doris { @@ -54,9 +57,25 @@ public: void clear_once(CachePolicy::CacheType type); + bool need_prune(int64_t* last_timestamp, const std::string& type) { + int64_t now = UnixSeconds(); + std::lock_guard<std::mutex> l(_caches_lock); + if (now - *last_timestamp > config::cache_prune_interval_sec) { + *last_timestamp = now; + return true; + } + LOG(INFO) << fmt::format( + "[MemoryGC] cache no prune {}, last prune less than interval {}, now {}, last " + "timestamp {}", + type, config::cache_prune_interval_sec, now, *last_timestamp); + return false; + } + private: std::mutex _caches_lock; std::list<CachePolicy*> _caches; + int64_t _last_prune_stale_timestamp = 0; + int64_t _last_prune_all_timestamp = 0; }; } // namespace doris diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index 9a9f2c36e84..f4d5bb1bb80 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -90,7 +90,7 @@ public: virtual ~CachePolicy(); virtual void prune_stale() = 0; - virtual void prune_all(bool clear) = 0; + virtual void prune_all(bool force) = 0; CacheType type() { return _type; } bool enable_prune() const { return _enable_prune; } diff --git a/be/src/runtime/memory/lru_cache_policy.h b/be/src/runtime/memory/lru_cache_policy.h index bfd1a2568a3..48b56e8b3b1 100644 --- a/be/src/runtime/memory/lru_cache_policy.h +++ b/be/src/runtime/memory/lru_cache_policy.h @@ -90,36 +90,50 @@ public: curtime); }; + LOG(INFO) << fmt::format("[MemoryGC] {} prune stale start, consumption {}", + type_string(_type), _cache->mem_consumption()); // Prune cache in lazy mode to save cpu and minimize the time holding write lock PrunedInfo pruned_info = _cache->prune_if(pred, true); COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); COUNTER_UPDATE(_prune_stale_number_counter, 1); - LOG(INFO) << fmt::format("{} prune stale {} entries, {} bytes, {} times prune", - type_string(_type), _freed_entrys_counter->value(), - _freed_memory_counter->value(), - _prune_stale_number_counter->value()); + LOG(INFO) << fmt::format( + "[MemoryGC] {} prune stale {} entries, {} bytes, {} times prune", + type_string(_type), _freed_entrys_counter->value(), + _freed_memory_counter->value(), _prune_stale_number_counter->value()); + } else { + LOG(INFO) << fmt::format( + "[MemoryGC] {} not need prune stale, consumption {} less than " + "CACHE_MIN_FREE_SIZE {}", + type_string(_type), _cache->mem_consumption(), CACHE_MIN_FREE_SIZE); } } - void prune_all(bool clear) override { + void prune_all(bool force) override { COUNTER_SET(_freed_entrys_counter, (int64_t)0); COUNTER_SET(_freed_memory_counter, (int64_t)0); if (_cache == ExecEnv::GetInstance()->get_dummy_lru_cache()) { return; } - if ((clear && _cache->mem_consumption() != 0) || + if ((force && _cache->mem_consumption() != 0) || _cache->mem_consumption() > CACHE_MIN_FREE_SIZE) { COUNTER_SET(_cost_timer, (int64_t)0); SCOPED_TIMER(_cost_timer); + LOG(INFO) << fmt::format("[MemoryGC] {} prune all start, consumption {}", + type_string(_type), _cache->mem_consumption()); PrunedInfo pruned_info = _cache->prune(); COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); COUNTER_UPDATE(_prune_all_number_counter, 1); LOG(INFO) << fmt::format( - "{} prune all {} entries, {} bytes, {} times prune, is clear: {}", + "[MemoryGC] {} prune all {} entries, {} bytes, {} times prune, is force: {}", type_string(_type), _freed_entrys_counter->value(), - _freed_memory_counter->value(), _prune_stale_number_counter->value(), clear); + _freed_memory_counter->value(), _prune_all_number_counter->value(), force); + } else { + LOG(INFO) << fmt::format( + "[MemoryGC] {} not need prune all, force is {}, consumption {}, " + "CACHE_MIN_FREE_SIZE {}", + type_string(_type), force, _cache->mem_consumption(), CACHE_MIN_FREE_SIZE); } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
