This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6cbefd5 [LRUCache] Expose LRU Cache status to metrics (#4688)
6cbefd5 is described below
commit 6cbefd56211780b5c2a6284118f48b5d8ba2181c
Author: Yingchun Lai <[email protected]>
AuthorDate: Thu Oct 22 21:37:02 2020 +0800
[LRUCache] Expose LRU Cache status to metrics (#4688)
Expose LRU Cache status to metrics would be helpful to diagnose
problems like high usage, low hit rate.
---
be/src/olap/file_helper.cpp | 2 +-
be/src/olap/fs/file_block_manager.cpp | 4 +-
be/src/olap/lru_cache.cpp | 121 ++++++++++++++--------------------
be/src/olap/lru_cache.h | 46 ++++++-------
be/src/olap/page_cache.cpp | 2 +-
be/src/olap/storage_engine.cpp | 8 +--
be/src/olap/storage_engine.h | 4 --
be/src/runtime/load_channel_mgr.cpp | 2 +-
be/src/util/file_cache.cpp | 2 +-
be/src/util/metrics.h | 3 +
be/test/olap/lru_cache_test.cpp | 2 +-
be/test/util/file_cache_test.cpp | 2 +-
12 files changed, 88 insertions(+), 110 deletions(-)
diff --git a/be/src/olap/file_helper.cpp b/be/src/olap/file_helper.cpp
index 8c3d68f..75e1c5b 100644
--- a/be/src/olap/file_helper.cpp
+++ b/be/src/olap/file_helper.cpp
@@ -47,7 +47,7 @@ FileHandler::FileHandler() :
static std::once_flag once_flag;
#ifdef BE_TEST
std::call_once(once_flag, [] {
- _s_fd_cache =
new_lru_cache(config::file_descriptor_cache_capacity);
+ _s_fd_cache = new_lru_cache("FileHandlerCacheTest",
config::file_descriptor_cache_capacity);
});
#else
// storage engine may not be opened when doris try to read and write
diff --git a/be/src/olap/fs/file_block_manager.cpp
b/be/src/olap/fs/file_block_manager.cpp
index 291dc0e..c493885 100644
--- a/be/src/olap/fs/file_block_manager.cpp
+++ b/be/src/olap/fs/file_block_manager.cpp
@@ -387,9 +387,9 @@ FileBlockManager::FileBlockManager(Env* env,
BlockManagerOptions opts) :
}
#ifdef BE_TEST
- _file_cache.reset(new FileCache<RandomAccessFile>("Readable file
cache", config::file_descriptor_cache_capacity));
+ _file_cache.reset(new
FileCache<RandomAccessFile>("Readable_file_cache",
config::file_descriptor_cache_capacity));
#else
- _file_cache.reset(new FileCache<RandomAccessFile>("Readable file
cache", StorageEngine::instance()->file_cache()));
+ _file_cache.reset(new
FileCache<RandomAccessFile>("Readable_file_cache",
StorageEngine::instance()->file_cache()));
#endif
}
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index df6df40..dd68b13 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -17,12 +17,20 @@
#include "olap/olap_index.h"
#include "olap/row_block.h"
#include "olap/utils.h"
+#include "util/doris_metrics.h"
using std::string;
using std::stringstream;
namespace doris {
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(capacity, MetricUnit::BYTES);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(usage, MetricUnit::BYTES);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(usage_ratio, MetricUnit::NOUNIT);
+DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(lookup_count, MetricUnit::OPERATIONS);
+DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(hit_count, MetricUnit::OPERATIONS);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(hit_ratio, MetricUnit::NOUNIT);
+
uint32_t CacheKey::hash(const char* data, size_t n, uint32_t seed) const {
// Similar to murmur hash
const uint32_t m = 0xc6a4a793;
@@ -82,9 +90,7 @@ LRUHandle* HandleTable::insert(LRUHandle* h) {
if (_elems > _length) {
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
- if (!_resize()) {
- return NULL;
- }
+ _resize();
}
}
@@ -114,7 +120,7 @@ LRUHandle** HandleTable::_find_pointer(const CacheKey& key,
uint32_t hash) {
return ptr;
}
-bool HandleTable::_resize() {
+void HandleTable::_resize() {
uint32_t new_length = 4;
while (new_length < _elems) {
@@ -122,21 +128,13 @@ bool HandleTable::_resize() {
}
LRUHandle** new_list = new(std::nothrow) LRUHandle*[new_length];
-
- if (NULL == new_list) {
- LOG(FATAL) << "failed to malloc new hash list. new_length=" <<
new_length;
- return false;
- }
-
memset(new_list, 0, sizeof(new_list[0]) * new_length);
uint32_t count = 0;
for (uint32_t i = 0; i < _length; i++) {
LRUHandle* h = _list[i];
-
while (h != NULL) {
LRUHandle* next = h->next_hash;
- CacheKey key = h->key();
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash & (new_length - 1)];
h->next_hash = *ptr;
@@ -146,20 +144,13 @@ bool HandleTable::_resize() {
}
}
- if (_elems != count) {
- delete [] new_list;
- LOG(FATAL) << "_elems not match new count. elems=" << _elems
- << ", count=" << count;
- return false;
- }
-
+ DCHECK_EQ(_elems, count);
delete [] _list;
_list = new_list;
_length = new_length;
- return true;
}
-LRUCache::LRUCache() : _usage(0), _last_id(0), _lookup_count(0),
+LRUCache::LRUCache() : _usage(0), _lookup_count(0),
_hit_count(0) {
// Make empty circular linked list
_lru.next = &_lru;
@@ -376,15 +367,29 @@ uint32_t ShardedLRUCache::_shard(uint32_t hash) {
return hash >> (32 - kNumShardBits);
}
-ShardedLRUCache::ShardedLRUCache(size_t capacity)
- : _last_id(0) {
- const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
-
- for (int s = 0; s < kNumShards; s++) {
- _shards[s].set_capacity(per_shard);
- }
+ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t
total_capacity)
+ : _name(name), _last_id(1) {
+ const size_t per_shard = (total_capacity + (kNumShards - 1)) / kNumShards;
+ for (int s = 0; s < kNumShards; s++) {
+ _shards[s].set_capacity(per_shard);
}
+ _entity = DorisMetrics::instance()->metric_registry()
+ ->register_entity(std::string("lru_cache:") + name, {{"name", name}});
+ _entity->register_hook(name,
std::bind(&ShardedLRUCache::update_cache_metrics, this));
+ INT_GAUGE_METRIC_REGISTER(_entity, capacity);
+ INT_GAUGE_METRIC_REGISTER(_entity, usage);
+ INT_DOUBLE_METRIC_REGISTER(_entity, usage_ratio);
+ INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, lookup_count);
+ INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, hit_count);
+ INT_DOUBLE_METRIC_REGISTER(_entity, hit_ratio);
+}
+
+ShardedLRUCache::~ShardedLRUCache() {
+ _entity->deregister_hook(_name);
+ DorisMetrics::instance()->metric_registry()->deregister_entity(_entity);
+}
+
Cache::Handle* ShardedLRUCache::insert(
const CacheKey& key,
void* value,
@@ -420,8 +425,7 @@ Slice ShardedLRUCache::value_slice(Handle* handle) {
}
uint64_t ShardedLRUCache::new_id() {
- MutexLock l(&_id_mutex);
- return ++(_last_id);
+ return _last_id.fetch_add(1, std::memory_order_relaxed);
}
void ShardedLRUCache::prune() {
@@ -432,51 +436,28 @@ void ShardedLRUCache::prune() {
VLOG(7) << "Successfully prune cache, clean " << num_prune << " entries.";
}
-size_t ShardedLRUCache::get_memory_usage() {
+void ShardedLRUCache::update_cache_metrics() const {
+ size_t total_capacity = 0;
size_t total_usage = 0;
- for (int s = 0; s < kNumShards; s++) {
- total_usage += _shards[s].get_usage();
- }
- return total_usage;
-}
-
-void ShardedLRUCache::get_cache_status(rapidjson::Document* document) {
- size_t shard_count = sizeof(_shards) / sizeof(LRUCache);
-
- for (uint32_t i = 0; i < shard_count; ++i) {
- size_t capacity = _shards[i].get_capacity();
- size_t usage = _shards[i].get_usage();
- rapidjson::Value shard_info(rapidjson::kObjectType);
- shard_info.AddMember("capacity", static_cast<double>(capacity),
document->GetAllocator());
- shard_info.AddMember("usage", static_cast<double>(usage),
document->GetAllocator());
-
- float usage_ratio = 0.0f;
-
- if (0 != capacity) {
- usage_ratio = static_cast<float>(usage) /
static_cast<float>(capacity);
- }
-
- shard_info.AddMember("usage_ratio", usage_ratio,
document->GetAllocator());
-
- size_t lookup_count = _shards[i].get_lookup_count();
- size_t hit_count = _shards[i].get_hit_count();
- shard_info.AddMember("lookup_count",
static_cast<double>(lookup_count), document->GetAllocator());
- shard_info.AddMember("hit_count", static_cast<double>(hit_count),
document->GetAllocator());
-
- float hit_ratio = 0.0f;
-
- if (0 != lookup_count) {
- hit_ratio = static_cast<float>(hit_count) /
static_cast<float>(lookup_count);
- }
-
- shard_info.AddMember("hit_ratio", hit_ratio, document->GetAllocator());
- document->PushBack(shard_info, document->GetAllocator());
+ size_t total_lookup_count = 0;
+ size_t total_hit_count = 0;
+ for (int i = 0; i < kNumShards; i++) {
+ total_capacity += _shards[i].get_capacity();
+ total_usage += _shards[i].get_usage();
+ total_lookup_count += _shards[i].get_lookup_count();
+ total_hit_count += _shards[i].get_hit_count();
}
+ capacity->set_value(total_capacity);
+ usage->set_value(total_usage);
+ lookup_count->set_value(total_lookup_count);
+ hit_count->set_value(total_hit_count);
+ usage_ratio->set_value(total_capacity == 0 ? 0 : (total_usage /
total_capacity));
+ hit_ratio->set_value(total_lookup_count == 0 ? 0 : (total_hit_count /
total_lookup_count));
}
-Cache* new_lru_cache(size_t capacity) {
- return new ShardedLRUCache(capacity);
+Cache* new_lru_cache(const std::string& name, size_t capacity) {
+ return new ShardedLRUCache(name, capacity);
}
} // namespace doris
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index ef3d6bb..b57c82b 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -14,6 +14,7 @@
#include <rapidjson/document.h>
#include "olap/olap_common.h"
+#include "util/metrics.h"
#include "util/mutex.h"
#include "util/slice.h"
@@ -46,9 +47,9 @@ namespace doris {
class Cache;
class CacheKey;
- // Create a new cache with a fixed size capacity. This implementation
+ // Create a new cache with a specified name and a fixed size capacity.
This implementation
// of Cache uses a least-recently-used eviction policy.
- extern Cache* new_lru_cache(size_t capacity);
+ extern Cache* new_lru_cache(const std::string& name, size_t capacity);
class CacheKey {
public:
@@ -221,11 +222,6 @@ namespace doris {
// leveldb may change prune() to a pure abstract method.
virtual void prune() {}
- // 获取运行统计项,包括内存占用
- virtual size_t get_memory_usage() = 0;
- // cache命中率统计
- virtual void get_cache_status(rapidjson::Document* document) = 0;
-
private:
DISALLOW_COPY_AND_ASSIGN(Cache);
};
@@ -235,9 +231,9 @@ namespace doris {
typedef struct LRUHandle {
void* value;
void (*deleter)(const CacheKey&, void* value);
- LRUHandle* next_hash;
- LRUHandle* next;
- LRUHandle* prev;
+ LRUHandle* next_hash; // next entry in hash table
+ LRUHandle* next; // next entry in lru list
+ LRUHandle* prev; // previous entry in lru list
size_t charge;
size_t key_length;
bool in_cache; // Whether entry is in the cache.
@@ -296,7 +292,7 @@ namespace doris {
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash);
- bool _resize();
+ void _resize();
};
// A single shard of sharded cache.
@@ -323,16 +319,16 @@ namespace doris {
void erase(const CacheKey& key, uint32_t hash);
int prune();
- uint64_t get_lookup_count() {
+ uint64_t get_lookup_count() const {
return _lookup_count;
}
- uint64_t get_hit_count() {
+ uint64_t get_hit_count() const {
return _hit_count;
}
- size_t get_usage() {
+ size_t get_usage() const {
return _usage;
}
- size_t get_capacity() {
+ size_t get_capacity() const {
return _capacity;
}
@@ -349,7 +345,6 @@ namespace doris {
// _mutex protects the following state.
Mutex _mutex;
size_t _usage;
- uint64_t _last_id;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
@@ -367,9 +362,9 @@ namespace doris {
class ShardedLRUCache : public Cache {
public:
- explicit ShardedLRUCache(size_t capacity);
+ explicit ShardedLRUCache(const std::string& name, size_t
total_capacity);
// TODO(fdy): 析构时清除所有cache元素
- virtual ~ShardedLRUCache() {}
+ virtual ~ShardedLRUCache();
virtual Handle* insert(
const CacheKey& key,
void* value,
@@ -383,16 +378,23 @@ namespace doris {
Slice value_slice(Handle* handle) override;
virtual uint64_t new_id();
virtual void prune();
- virtual size_t get_memory_usage();
- virtual void get_cache_status(rapidjson::Document* document);
+ void update_cache_metrics() const;
private:
static inline uint32_t _hash_slice(const CacheKey& s);
static uint32_t _shard(uint32_t hash);
+ std::string _name;
LRUCache _shards[kNumShards];
- Mutex _id_mutex;
- uint64_t _last_id;
+ std::atomic<uint64_t> _last_id;
+
+ std::shared_ptr<MetricEntity> _entity = nullptr;
+ IntGauge* capacity = nullptr;
+ IntGauge* usage = nullptr;
+ DoubleGauge* usage_ratio = nullptr;
+ IntAtomicCounter* lookup_count = nullptr;
+ IntAtomicCounter* hit_count = nullptr;
+ DoubleGauge* hit_ratio = nullptr;
};
} // namespace doris
diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp
index f92868b..4643ef8 100644
--- a/be/src/olap/page_cache.cpp
+++ b/be/src/olap/page_cache.cpp
@@ -27,7 +27,7 @@ void StoragePageCache::create_global_cache(size_t capacity) {
_s_instance = &instance;
}
-StoragePageCache::StoragePageCache(size_t capacity) :
_cache(new_lru_cache(capacity)) {
+StoragePageCache::StoragePageCache(size_t capacity) :
_cache(new_lru_cache("StoragePageCache", capacity)) {
}
bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle) {
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index adbf9a5..1650567 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -170,9 +170,9 @@ Status StorageEngine::_open() {
RETURN_NOT_OK_STATUS_WITH_WARN(_check_file_descriptor_number(), "check fd
number failed");
- _index_stream_lru_cache =
new_lru_cache(config::index_stream_cache_capacity);
+ _index_stream_lru_cache = new_lru_cache("SegmentIndexCache",
config::index_stream_cache_capacity);
- _file_cache.reset(new_lru_cache(config::file_descriptor_cache_capacity));
+ _file_cache.reset(new_lru_cache("FileHandlerCache",
config::file_descriptor_cache_capacity));
auto dirs = get_stores<false>();
load_data_dirs(dirs);
@@ -633,10 +633,6 @@ void
StorageEngine::_perform_base_compaction(TabletSharedPtr best_tablet) {
best_tablet->set_last_base_compaction_failure_time(0);
}
-void StorageEngine::get_cache_status(rapidjson::Document* document) const {
- return _index_stream_lru_cache->get_cache_status(document);
-}
-
OLAPStatus StorageEngine::_start_trash_sweep(double* usage) {
OLAPStatus res = OLAP_SUCCESS;
LOG(INFO) << "start trash and snapshot sweep.";
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 7321358..bf9650c 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -84,9 +84,6 @@ public:
void clear_transaction_task(const TTransactionId transaction_id,
const std::vector<TPartitionId>&
partition_ids);
- // 获取cache的使用情况信息
- void get_cache_status(rapidjson::Document* document) const;
-
// Note: 这里只能reload原先已经存在的root path,即re-load启动时就登记的root path
// 是允许的,但re-load全新的path是不允许的,因为此处没有彻底更新ce调度器信息
void load_data_dirs(const std::vector<DataDir*>& stores);
@@ -283,7 +280,6 @@ private:
int32_t _effective_cluster_id;
bool _is_all_cluster_id_exist;
- Cache* _file_descriptor_lru_cache;
Cache* _index_stream_lru_cache;
// _file_cache is a lru_cache for file descriptors of files opened by
doris,
diff --git a/be/src/runtime/load_channel_mgr.cpp
b/be/src/runtime/load_channel_mgr.cpp
index 0fb5692..d48f46f 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -68,7 +68,7 @@ LoadChannelMgr::LoadChannelMgr() :
_stop_background_threads_latch(1) {
std::lock_guard<std::mutex> l(_lock);
return _load_channels.size();
});
- _last_success_channel = new_lru_cache(1024);
+ _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024);
}
LoadChannelMgr::~LoadChannelMgr() {
diff --git a/be/src/util/file_cache.cpp b/be/src/util/file_cache.cpp
index bcad623..0bdfb13 100644
--- a/be/src/util/file_cache.cpp
+++ b/be/src/util/file_cache.cpp
@@ -25,7 +25,7 @@ namespace doris {
template <class FileType>
FileCache<FileType>::FileCache(const std::string& cache_name, int
max_open_files) :
_cache_name(cache_name),
- _cache(new_lru_cache(max_open_files)),
+ _cache(new_lru_cache(std::string("FileBlockManagerCache:") +
cache_name, max_open_files)),
_is_cache_own(true) { }
template <class FileType>
diff --git a/be/src/util/metrics.h b/be/src/util/metrics.h
index cc85ffe..cb69012 100644
--- a/be/src/util/metrics.h
+++ b/be/src/util/metrics.h
@@ -277,6 +277,9 @@ public:
#define INT_GAUGE_METRIC_REGISTER(entity, metric)
\
metric = (IntGauge*)(entity->register_metric<IntGauge>(&METRIC_##metric))
+#define INT_DOUBLE_METRIC_REGISTER(entity, metric)
\
+ metric =
(DoubleGauge*)(entity->register_metric<DoubleGauge>(&METRIC_##metric))
+
#define INT_UGAUGE_METRIC_REGISTER(entity, metric)
\
metric = (UIntGauge*)(entity->register_metric<UIntGauge>(&METRIC_##metric))
diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp
index ca5c7b1..537b696 100644
--- a/be/test/olap/lru_cache_test.cpp
+++ b/be/test/olap/lru_cache_test.cpp
@@ -71,7 +71,7 @@ public:
std::vector<int> _deleted_values;
Cache* _cache;
- CacheTest() : _cache(new_lru_cache(kCacheSize)) {
+ CacheTest() : _cache(new_lru_cache("test", kCacheSize)) {
_s_current = this;
}
diff --git a/be/test/util/file_cache_test.cpp b/be/test/util/file_cache_test.cpp
index efb4aa6..33ac37f 100644
--- a/be/test/util/file_cache_test.cpp
+++ b/be/test/util/file_cache_test.cpp
@@ -28,7 +28,7 @@ public:
FileCacheTest() { }
void SetUp() override {
- _file_cache.reset(new FileCache<RandomAccessFile>("test cache",
10000));
+ _file_cache.reset(new FileCache<RandomAccessFile>("test_cache",
10000));
_file_exist = "file_exist";
std::unique_ptr<WritableFile> file;
auto st = Env::Default()->new_writable_file(_file_exist, &file);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]