This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new f40868a [Optimize] Improve LRU cache's performance (#4781)
f40868a is described below
commit f40868a4805f0ba503bcca9a9c04f704b61c121b
Author: Yingchun Lai <[email protected]>
AuthorDate: Fri Nov 6 10:56:27 2020 +0800
[Optimize] Improve LRU cache's performance (#4781)
When LRUCache insert and evict a large number of entries, there are
frequently calls of HandleTable::remove(e->key, e->hash), it will
lookup the entry in the hash table. Now that we know the entry to
remove 'e', we can remove it directly from hash table's collision list
if it's a double linked list.
This patch refactor the collision list to double linked list, the simple
benchmark CacheTest.SimpleBenchmark shows that time cost reduced about
18% in my test environment.
---
be/src/olap/lru_cache.cpp | 121 ++++++++++++++++++++++++++--------------
be/src/olap/lru_cache.h | 35 ++++++++----
be/test/olap/lru_cache_test.cpp | 113 +++++++++++++++++++++++++++++++++++++
3 files changed, 216 insertions(+), 53 deletions(-)
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index dd68b13..1a93a21 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -73,20 +73,26 @@ uint32_t CacheKey::hash(const char* data, size_t n,
uint32_t seed) const {
Cache::~Cache() {
}
+HandleTable::~HandleTable() {
+ for (uint32_t i = 0; i < _length; i++) {
+ delete _list[i];
+ }
+ delete[] _list;
+}
+
// LRU cache implementation
LRUHandle* HandleTable::lookup(const CacheKey& key, uint32_t hash) {
return *_find_pointer(key, hash);
}
LRUHandle* HandleTable::insert(LRUHandle* h) {
- LRUHandle** ptr = _find_pointer(h->key(), h->hash);
- LRUHandle* old = *ptr;
- h->next_hash = (old == NULL ? NULL : old->next_hash);
- *ptr = h;
+ LRUHandle* old = remove(h->key(), h->hash);
+ LRUHandle* head = _list[h->hash & (_length - 1)];
- if (old == NULL) {
- ++_elems;
+ _head_insert(head, h);
+ ++_elems;
+ if (old == NULL) {
if (_elems > _length) {
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
@@ -101,17 +107,24 @@ LRUHandle* HandleTable::remove(const CacheKey& key,
uint32_t hash) {
LRUHandle** ptr = _find_pointer(key, hash);
LRUHandle* result = *ptr;
- if (result != NULL) {
- *ptr = result->next_hash;
- --_elems;
- }
+ remove(result);
return result;
}
-LRUHandle** HandleTable::_find_pointer(const CacheKey& key, uint32_t hash) {
- LRUHandle** ptr = &_list[hash & (_length - 1)];
+void HandleTable::remove(const LRUHandle* h) {
+ if (h != nullptr) {
+ if (h->next_hash != nullptr) {
+ h->next_hash->prev_hash = h->prev_hash;
+ }
+ DCHECK(h->prev_hash != nullptr);
+ h->prev_hash->next_hash = h->next_hash;
+ --_elems;
+ }
+}
+LRUHandle** HandleTable::_find_pointer(const CacheKey& key, uint32_t hash) {
+ LRUHandle** ptr = &(_list[hash & (_length - 1)]->next_hash);
while (*ptr != NULL &&
((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
@@ -120,42 +133,56 @@ LRUHandle** HandleTable::_find_pointer(const CacheKey&
key, uint32_t hash) {
return ptr;
}
-void HandleTable::_resize() {
- uint32_t new_length = 4;
+void HandleTable::_head_insert(LRUHandle* head, LRUHandle* handle) {
+ handle->next_hash = head->next_hash;
+ if (handle->next_hash != nullptr) {
+ handle->next_hash->prev_hash = handle;
+ }
+ handle->prev_hash = head;
+ head->next_hash = handle;
+}
- while (new_length < _elems) {
+void HandleTable::_resize() {
+ uint32_t new_length = 16;
+ while (new_length < _elems * 1.5) {
new_length *= 2;
}
LRUHandle** new_list = new(std::nothrow) LRUHandle*[new_length];
memset(new_list, 0, sizeof(new_list[0]) * new_length);
- uint32_t count = 0;
+ for (uint32_t i = 0; i < new_length; i++) {
+ // The first node in the linked-list is a dummy node used for
+ // inserting new node mainly.
+ new_list[i] = new LRUHandle();
+ }
+ uint32_t count = 0;
for (uint32_t i = 0; i < _length; i++) {
- LRUHandle* h = _list[i];
+ LRUHandle* h = _list[i]->next_hash;
while (h != NULL) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
- LRUHandle** ptr = &new_list[hash & (new_length - 1)];
- h->next_hash = *ptr;
- *ptr = h;
+ LRUHandle* head = new_list[hash & (new_length - 1)];
+ _head_insert(head, h);
h = next;
count++;
}
}
DCHECK_EQ(_elems, count);
+ for (uint32_t i = 0; i < _length; i++) {
+ delete _list[i];
+ }
delete [] _list;
_list = new_list;
_length = new_length;
}
-LRUCache::LRUCache() : _usage(0), _lookup_count(0),
- _hit_count(0) {
- // Make empty circular linked list
- _lru.next = &_lru;
- _lru.prev = &_lru;
- }
+LRUCache::LRUCache() {
+ // Make empty circular linked list
+ _lru.next = &_lru;
+ _lru.prev = &_lru;
+}
LRUCache::~LRUCache() {
prune();
@@ -213,7 +240,7 @@ void LRUCache::release(Cache::Handle* handle) {
// only exists in cache
if (_usage > _capacity) {
// take this opportunity and remove the item
- _table.remove(e->key(), e->hash);
+ _table.remove(e);
e->in_cache = false;
_unref(e);
_usage -= e->charge;
@@ -231,7 +258,7 @@ void LRUCache::release(Cache::Handle* handle) {
}
}
-void LRUCache::_evict_from_lru(size_t charge, std::vector<LRUHandle*>*
deleted) {
+void LRUCache::_evict_from_lru(size_t charge, LRUHandle** to_remove_head) {
LRUHandle* cur = &_lru;
// 1. evict normal cache entries
while (_usage + charge > _capacity && cur->next != &_lru) {
@@ -241,14 +268,16 @@ void LRUCache::_evict_from_lru(size_t charge,
std::vector<LRUHandle*>* deleted)
continue;
}
_evict_one_entry(old);
- deleted->push_back(old);
+ old->next = *to_remove_head;
+ *to_remove_head = old;
}
// 2. evict durable cache entries if need
while (_usage + charge > _capacity && _lru.next != &_lru) {
LRUHandle* old = _lru.next;
DCHECK(old->priority == CachePriority::DURABLE);
_evict_one_entry(old);
- deleted->push_back(old);
+ old->next = *to_remove_head;
+ *to_remove_head = old;
}
}
@@ -256,7 +285,7 @@ void LRUCache::_evict_one_entry(LRUHandle* e) {
DCHECK(e->in_cache);
DCHECK(e->refs == 1); // LRU list contains elements which may be evicted
_lru_remove(e);
- _table.remove(e->key(), e->hash);
+ _table.remove(e);
e->in_cache = false;
_unref(e);
_usage -= e->charge;
@@ -279,13 +308,13 @@ Cache::Handle* LRUCache::insert(
e->in_cache = true;
e->priority = priority;
memcpy(e->key_data, key.data(), key.size());
- std::vector<LRUHandle*> last_ref_list;
+ LRUHandle* to_remove_head = nullptr;
{
MutexLock l(&_mutex);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty
- _evict_from_lru(charge, &last_ref_list);
+ _evict_from_lru(charge, &to_remove_head);
// insert into the cache
// note that the cache might get larger than its capacity if not enough
@@ -299,15 +328,18 @@ Cache::Handle* LRUCache::insert(
// old is on LRU because it's in cache and its reference count
// was just 1 (Unref returned 0)
_lru_remove(old);
- last_ref_list.push_back(old);
+ old->next = to_remove_head;
+ to_remove_head = old;
}
}
}
// we free the entries here outside of mutex for
// performance reasons
- for (auto entry : last_ref_list) {
- entry->free();
+ while (to_remove_head != nullptr) {
+ LRUHandle* next = to_remove_head->next;
+ to_remove_head->free();
+ to_remove_head = next;
}
return reinterpret_cast<Cache::Handle*>(e);
@@ -338,7 +370,7 @@ void LRUCache::erase(const CacheKey& key, uint32_t hash) {
}
int LRUCache::prune() {
- std::vector<LRUHandle*> last_ref_list;
+ LRUHandle* to_remove_head = nullptr;
{
MutexLock l(&_mutex);
while (_lru.next != &_lru) {
@@ -346,17 +378,22 @@ int LRUCache::prune() {
DCHECK(old->in_cache);
DCHECK(old->refs == 1); // LRU list contains elements which may
be evicted
_lru_remove(old);
- _table.remove(old->key(), old->hash);
+ _table.remove(old);
old->in_cache = false;
_unref(old);
_usage -= old->charge;
- last_ref_list.push_back(old);
+ old->next = to_remove_head;
+ to_remove_head = old;
}
}
- for (auto entry : last_ref_list) {
- entry->free();
+ int pruned_count = 0;
+ while (to_remove_head != nullptr) {
+ ++pruned_count;
+ LRUHandle* next = to_remove_head->next;
+ to_remove_head->free();
+ to_remove_head = next;
}
- return last_ref_list.size();
+ return pruned_count;
}
inline uint32_t ShardedLRUCache::_hash_slice(const CacheKey& s) {
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index b57c82b..3e483a1 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -11,6 +11,7 @@
#include <string>
#include <vector>
+#include <gtest/gtest_prod.h>
#include <rapidjson/document.h>
#include "olap/olap_common.h"
@@ -231,9 +232,10 @@ namespace doris {
typedef struct LRUHandle {
void* value;
void (*deleter)(const CacheKey&, void* value);
- LRUHandle* next_hash; // next entry in hash table
- LRUHandle* next; // next entry in lru list
- LRUHandle* prev; // previous entry in lru list
+ LRUHandle* next_hash = nullptr; // next entry in hash table
+ LRUHandle* prev_hash = nullptr; // previous entry in hash table
+ LRUHandle* next = nullptr; // next entry in lru list
+ LRUHandle* prev = nullptr; // previous entry in lru list
size_t charge;
size_t key_length;
bool in_cache; // Whether entry is in the cache.
@@ -271,17 +273,22 @@ namespace doris {
_resize();
}
- ~HandleTable() {
- delete[] _list;
- }
+ ~HandleTable();
LRUHandle* lookup(const CacheKey& key, uint32_t hash);
LRUHandle* insert(LRUHandle* h);
+ // Remove element from hash table by "key" and "hash".
LRUHandle* remove(const CacheKey& key, uint32_t hash);
+ // Remove element from hash table by "h", it would be faster
+ // than the function above.
+ void remove(const LRUHandle* h);
+
private:
+ FRIEND_TEST(CacheTest, HandleTableTest);
+
// The tablet consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
uint32_t _length;
@@ -292,6 +299,10 @@ namespace doris {
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash);
+
+ // Insert "handle" after "head".
+ void _head_insert(LRUHandle* head, LRUHandle* handle);
+
void _resize();
};
@@ -336,15 +347,15 @@ namespace doris {
void _lru_remove(LRUHandle* e);
void _lru_append(LRUHandle* list, LRUHandle* e);
bool _unref(LRUHandle* e);
- void _evict_from_lru(size_t charge, std::vector<LRUHandle*>*
deleted);
+ void _evict_from_lru(size_t charge, LRUHandle** to_remove_head);
void _evict_one_entry(LRUHandle* e);
// Initialized before use.
- size_t _capacity;
+ size_t _capacity = 0;
// _mutex protects the following state.
Mutex _mutex;
- size_t _usage;
+ size_t _usage = 0;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
@@ -353,8 +364,8 @@ namespace doris {
HandleTable _table;
- uint64_t _lookup_count; // cache查找总次数
- uint64_t _hit_count; // 命中cache的总次数
+ uint64_t _lookup_count = 0; // cache查找总次数
+ uint64_t _hit_count = 0; // 命中cache的总次数
};
static const int kNumShardBits = 4;
@@ -378,6 +389,8 @@ namespace doris {
Slice value_slice(Handle* handle) override;
virtual uint64_t new_id();
virtual void prune();
+
+ private:
void update_cache_metrics() const;
private:
diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp
index 537b696..a663e8f 100644
--- a/be/test/olap/lru_cache_test.cpp
+++ b/be/test/olap/lru_cache_test.cpp
@@ -299,6 +299,119 @@ TEST_F(CacheTest, NewId) {
ASSERT_NE(a, b);
}
+TEST_F(CacheTest, SimpleBenchmark) {
+ for (int i = 0; i < kCacheSize * 10000; i++) {
+ Insert(1000 + i, 2000 + i, 1);
+ ASSERT_EQ(2000 + i, Lookup(1000 + i));
+ }
+}
+
+TEST(CacheHandleTest, HandleTableTest) {
+ HandleTable ht;
+
+ for (uint32_t i = 0; i < ht._length; ++i) {
+ ASSERT_NE(ht._list[i], nullptr);
+ ASSERT_EQ(ht._list[i]->next_hash, nullptr);
+ ASSERT_EQ(ht._list[i]->prev_hash, nullptr);
+ }
+
+ const int count = 10;
+ CacheKey keys[count] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
+ ASSERT_NE(keys[0], keys[1]);
+ LRUHandle* hs[count];
+ for (int i = 0; i < count; ++i) {
+ CacheKey* key = &keys[i];
+ LRUHandle* h = reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) -
1 + key->size()));
+ h->value = nullptr;
+ h->deleter = nullptr;
+ h->charge = 1;
+ h->key_length = key->size();
+ h->hash = 1; // make them in a same hash table linked-list
+ h->refs = 0;
+ h->next = h->prev = nullptr;
+ h->prev_hash = nullptr;
+ h->next_hash = nullptr;
+ h->in_cache = false;
+ h->priority = CachePriority::NORMAL;
+ memcpy(h->key_data, key->data(), key->size());
+
+ LRUHandle* old = ht.insert(h);
+ ASSERT_EQ(ht._elems, i + 1);
+ ASSERT_EQ(old, nullptr); // there is no entry with the same key and
hash
+ hs[i] = h;
+ }
+ ASSERT_EQ(ht._elems, count);
+ LRUHandle* h = ht.lookup(CacheKey(std::to_string(count - 1)), 1);
+ LRUHandle* head = ht._list[1 & (ht._length - 1)];
+ ASSERT_EQ(head, h->prev_hash);
+ ASSERT_EQ(head->next_hash, h);
+ int index = count - 1;
+ while (h != nullptr) {
+ ASSERT_EQ(hs[index], h) << index;
+ h = h->next_hash;
+ if (h != nullptr) {
+ ASSERT_EQ(hs[index], h->prev_hash);
+ }
+ --index;
+ }
+
+ for (int i = 0; i < count; ++i) {
+ CacheKey* key = &keys[i];
+ LRUHandle* h = reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) -
1 + key->size()));
+ h->value = nullptr;
+ h->deleter = nullptr;
+ h->charge = 1;
+ h->key_length = key->size();
+ h->hash = 1; // make them in a same hash table linked-list
+ h->refs = 0;
+ h->next = h->prev = nullptr;
+ h->prev_hash = nullptr;
+ h->next_hash = nullptr;
+ h->in_cache = false;
+ h->priority = CachePriority::NORMAL;
+ memcpy(h->key_data, key->data(), key->size());
+
+ ASSERT_EQ(ht.insert(h), hs[i]); // there is an entry with the same
key and hash
+ ASSERT_EQ(ht._elems, count);
+ free(hs[i]);
+ hs[i] = h;
+ }
+ ASSERT_EQ(ht._elems, count);
+
+ for (int i = 0; i < count; ++i) {
+ ASSERT_EQ(ht.lookup(keys[i], 1), hs[i]);
+ }
+
+ LRUHandle* old = ht.remove(CacheKey("9"), 1); // first in hash table
linked-list
+ ASSERT_EQ(old, hs[9]);
+ ASSERT_EQ(old->prev_hash, head);
+ ASSERT_EQ(old->next_hash, hs[8]); // hs[8] is the new first node
+ ASSERT_EQ(head->next_hash, hs[8]);
+ ASSERT_EQ(hs[8]->prev_hash, head);
+
+ old = ht.remove(CacheKey("0"), 1); // last in hash table linked-list
+ ASSERT_EQ(old, hs[0]);
+ ASSERT_EQ(old->prev_hash, hs[1]); // hs[1] is the new last node
+ ASSERT_EQ(old->prev_hash->next_hash, nullptr);
+
+ old = ht.remove(CacheKey("5"), 1); // middle in hash table linked-list
+ ASSERT_EQ(old, hs[5]);
+ ASSERT_EQ(old->prev_hash, hs[6]);
+ ASSERT_EQ(old->next_hash, hs[4]);
+ ASSERT_EQ(hs[6]->next_hash, hs[4]);
+ ASSERT_EQ(hs[4]->prev_hash, hs[6]);
+
+ ht.remove(hs[4]); // middle in hash table linked-list
+ ASSERT_EQ(hs[6]->next_hash, hs[3]);
+ ASSERT_EQ(hs[3]->prev_hash, hs[6]);
+
+ ASSERT_EQ(ht._elems, count - 4);
+
+ for (int i = 0; i < count; ++i) {
+ free(hs[i]);
+ }
+}
+
} // namespace doris
int main(int argc, char** argv) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]