[incubator-doris] branch master updated: [Optimize] Improve LRU cache's performance (#4781)

morningman Thu, 05 Nov 2020 18:56:46 -0800

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git



The following commit(s) were added to refs/heads/master by this push:
     new f40868a  [Optimize] Improve LRU cache's performance (#4781)
f40868a is described below

commit f40868a4805f0ba503bcca9a9c04f704b61c121b
Author: Yingchun Lai <[email protected]>
AuthorDate: Fri Nov 6 10:56:27 2020 +0800

    [Optimize] Improve LRU cache's performance (#4781)
    
    When LRUCache insert and evict a large number of entries, there are
    frequently calls of HandleTable::remove(e->key, e->hash), it will
    lookup the entry in the hash table. Now that we know the entry to
    remove 'e', we can remove it directly from hash table's collision list
    if it's a double linked list.
    This patch refactor the collision list to double linked list, the simple
    benchmark CacheTest.SimpleBenchmark shows that time cost reduced about
    18% in my test environment.
---
 be/src/olap/lru_cache.cpp       | 121 ++++++++++++++++++++++++++--------------
 be/src/olap/lru_cache.h         |  35 ++++++++----
 be/test/olap/lru_cache_test.cpp | 113 +++++++++++++++++++++++++++++++++++++
 3 files changed, 216 insertions(+), 53 deletions(-)

diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index dd68b13..1a93a21 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -73,20 +73,26 @@ uint32_t CacheKey::hash(const char* data, size_t n, 
uint32_t seed) const {
 Cache::~Cache() {
 }
 
+HandleTable::~HandleTable() {
+    for (uint32_t i = 0; i < _length; i++) {
+        delete _list[i];
+    }
+    delete[] _list;
+}
+
 // LRU cache implementation
 LRUHandle* HandleTable::lookup(const CacheKey& key, uint32_t hash) {
     return *_find_pointer(key, hash);
 }
 
 LRUHandle* HandleTable::insert(LRUHandle* h) {
-    LRUHandle** ptr = _find_pointer(h->key(), h->hash);
-    LRUHandle* old = *ptr;
-    h->next_hash = (old == NULL ? NULL : old->next_hash);
-    *ptr = h;
+    LRUHandle* old = remove(h->key(), h->hash);
+    LRUHandle* head = _list[h->hash & (_length - 1)];
 
-    if (old == NULL) {
-        ++_elems;
+    _head_insert(head, h);
+    ++_elems;
 
+    if (old == NULL) {
         if (_elems > _length) {
             // Since each cache entry is fairly large, we aim for a small
             // average linked list length (<= 1).
@@ -101,17 +107,24 @@ LRUHandle* HandleTable::remove(const CacheKey& key, 
uint32_t hash) {
     LRUHandle** ptr = _find_pointer(key, hash);
     LRUHandle* result = *ptr;
 
-    if (result != NULL) {
-        *ptr = result->next_hash;
-        --_elems;
-    }
+    remove(result);
 
     return result;
 }
 
-LRUHandle** HandleTable::_find_pointer(const CacheKey& key, uint32_t hash) {
-    LRUHandle** ptr = &_list[hash & (_length - 1)];
+void HandleTable::remove(const LRUHandle* h) {
+    if (h != nullptr) {
+        if (h->next_hash != nullptr) {
+            h->next_hash->prev_hash = h->prev_hash;
+        }
+        DCHECK(h->prev_hash != nullptr);
+        h->prev_hash->next_hash = h->next_hash;
+        --_elems;
+    }
+}
 
+LRUHandle** HandleTable::_find_pointer(const CacheKey& key, uint32_t hash) {
+    LRUHandle** ptr = &(_list[hash & (_length - 1)]->next_hash);
     while (*ptr != NULL &&
             ((*ptr)->hash != hash || key != (*ptr)->key())) {
         ptr = &(*ptr)->next_hash;
@@ -120,42 +133,56 @@ LRUHandle** HandleTable::_find_pointer(const CacheKey& 
key, uint32_t hash) {
     return ptr;
 }
 
-void HandleTable::_resize() {
-    uint32_t new_length = 4;
+void HandleTable::_head_insert(LRUHandle* head, LRUHandle* handle) {
+    handle->next_hash = head->next_hash;
+    if (handle->next_hash != nullptr) {
+        handle->next_hash->prev_hash = handle;
+    }
+    handle->prev_hash = head;
+    head->next_hash = handle;
+}
 
-    while (new_length < _elems) {
+void HandleTable::_resize() {
+    uint32_t new_length = 16;
+    while (new_length < _elems * 1.5) {
         new_length *= 2;
     }
 
     LRUHandle** new_list = new(std::nothrow) LRUHandle*[new_length];
     memset(new_list, 0, sizeof(new_list[0]) * new_length);
-    uint32_t count = 0;
+    for (uint32_t i = 0; i < new_length; i++) {
+        // The first node in the linked-list is a dummy node used for
+        // inserting new node mainly.
+        new_list[i] = new LRUHandle();
+    }
 
+    uint32_t count = 0;
     for (uint32_t i = 0; i < _length; i++) {
-        LRUHandle* h = _list[i];
+        LRUHandle* h = _list[i]->next_hash;
         while (h != NULL) {
             LRUHandle* next = h->next_hash;
             uint32_t hash = h->hash;
-            LRUHandle** ptr = &new_list[hash & (new_length - 1)];
-            h->next_hash = *ptr;
-            *ptr = h;
+            LRUHandle* head = new_list[hash & (new_length - 1)];
+            _head_insert(head, h);
             h = next;
             count++;
         }
     }
 
     DCHECK_EQ(_elems, count);
+    for (uint32_t i = 0; i < _length; i++) {
+        delete _list[i];
+    }
     delete [] _list;
     _list = new_list;
     _length = new_length;
 }
 
-LRUCache::LRUCache() : _usage(0), _lookup_count(0),
-    _hit_count(0) {
-        // Make empty circular linked list
-        _lru.next = &_lru;
-        _lru.prev = &_lru;
-    }
+LRUCache::LRUCache() {
+    // Make empty circular linked list
+    _lru.next = &_lru;
+    _lru.prev = &_lru;
+}
 
 LRUCache::~LRUCache() {
     prune();
@@ -213,7 +240,7 @@ void LRUCache::release(Cache::Handle* handle) {
             // only exists in cache
             if (_usage > _capacity) {
                 // take this opportunity and remove the item
-                _table.remove(e->key(), e->hash);
+                _table.remove(e);
                 e->in_cache = false;
                 _unref(e);
                 _usage -= e->charge;
@@ -231,7 +258,7 @@ void LRUCache::release(Cache::Handle* handle) {
     }
 }
 
-void LRUCache::_evict_from_lru(size_t charge, std::vector<LRUHandle*>* 
deleted) {
+void LRUCache::_evict_from_lru(size_t charge, LRUHandle** to_remove_head) {
     LRUHandle* cur = &_lru;
     // 1. evict normal cache entries
     while (_usage + charge > _capacity && cur->next != &_lru) {
@@ -241,14 +268,16 @@ void LRUCache::_evict_from_lru(size_t charge, 
std::vector<LRUHandle*>* deleted)
             continue;
         }
         _evict_one_entry(old);
-        deleted->push_back(old);
+        old->next = *to_remove_head;
+        *to_remove_head = old;
     }
     // 2. evict durable cache entries if need
     while (_usage + charge > _capacity && _lru.next != &_lru) {
         LRUHandle* old = _lru.next;
         DCHECK(old->priority == CachePriority::DURABLE);
         _evict_one_entry(old);
-        deleted->push_back(old);
+        old->next = *to_remove_head;
+        *to_remove_head = old;
     }
 }
 
@@ -256,7 +285,7 @@ void LRUCache::_evict_one_entry(LRUHandle* e) {
     DCHECK(e->in_cache);
     DCHECK(e->refs == 1); // LRU list contains elements which may be evicted
     _lru_remove(e);
-    _table.remove(e->key(), e->hash);
+    _table.remove(e);
     e->in_cache = false;
     _unref(e);
     _usage -= e->charge;
@@ -279,13 +308,13 @@ Cache::Handle* LRUCache::insert(
     e->in_cache = true;
     e->priority = priority;
     memcpy(e->key_data, key.data(), key.size());
-    std::vector<LRUHandle*> last_ref_list;
+    LRUHandle* to_remove_head = nullptr;
     {
         MutexLock l(&_mutex);
 
         // Free the space following strict LRU policy until enough space
         // is freed or the lru list is empty
-        _evict_from_lru(charge, &last_ref_list);
+        _evict_from_lru(charge, &to_remove_head);
 
         // insert into the cache
         // note that the cache might get larger than its capacity if not enough
@@ -299,15 +328,18 @@ Cache::Handle* LRUCache::insert(
                 // old is on LRU because it's in cache and its reference count
                 // was just 1 (Unref returned 0)
                 _lru_remove(old);
-                last_ref_list.push_back(old);
+                old->next = to_remove_head;
+                to_remove_head = old;
             }
         }
     }
 
     // we free the entries here outside of mutex for
     // performance reasons
-    for (auto entry : last_ref_list) {
-        entry->free();
+    while (to_remove_head != nullptr) {
+        LRUHandle* next = to_remove_head->next;
+        to_remove_head->free();
+        to_remove_head = next;
     }
 
     return reinterpret_cast<Cache::Handle*>(e);
@@ -338,7 +370,7 @@ void LRUCache::erase(const CacheKey& key, uint32_t hash) {
 }
 
 int LRUCache::prune() {
-    std::vector<LRUHandle*> last_ref_list;
+    LRUHandle* to_remove_head = nullptr;
     {
         MutexLock l(&_mutex);
         while (_lru.next != &_lru) {
@@ -346,17 +378,22 @@ int LRUCache::prune() {
             DCHECK(old->in_cache);
             DCHECK(old->refs == 1);  // LRU list contains elements which may 
be evicted
             _lru_remove(old);
-            _table.remove(old->key(), old->hash);
+            _table.remove(old);
             old->in_cache = false;
             _unref(old);
             _usage -= old->charge;
-            last_ref_list.push_back(old);
+            old->next = to_remove_head;
+            to_remove_head = old;
         }
     }
-    for (auto entry : last_ref_list) {
-        entry->free();
+    int pruned_count = 0;
+    while (to_remove_head != nullptr) {
+        ++pruned_count;
+        LRUHandle* next = to_remove_head->next;
+        to_remove_head->free();
+        to_remove_head = next;
     }
-    return last_ref_list.size();
+    return pruned_count;
 }
 
 inline uint32_t ShardedLRUCache::_hash_slice(const CacheKey& s) {
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index b57c82b..3e483a1 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -11,6 +11,7 @@
 #include <string>
 #include <vector>
 
+#include <gtest/gtest_prod.h>
 #include <rapidjson/document.h>
 
 #include "olap/olap_common.h"
@@ -231,9 +232,10 @@ namespace doris {
     typedef struct LRUHandle {
         void* value;
         void (*deleter)(const CacheKey&, void* value);
-        LRUHandle* next_hash;  // next entry in hash table
-        LRUHandle* next;       // next entry in lru list
-        LRUHandle* prev;       // previous entry in lru list
+        LRUHandle* next_hash = nullptr;  // next entry in hash table
+        LRUHandle* prev_hash = nullptr;  // previous entry in hash table
+        LRUHandle* next = nullptr;       // next entry in lru list
+        LRUHandle* prev = nullptr;       // previous entry in lru list
         size_t charge;
         size_t key_length;
         bool in_cache;      // Whether entry is in the cache.
@@ -271,17 +273,22 @@ namespace doris {
                 _resize();
             }
 
-            ~HandleTable() {
-                delete[] _list;
-            }
+            ~HandleTable();
 
             LRUHandle* lookup(const CacheKey& key, uint32_t hash);
 
             LRUHandle* insert(LRUHandle* h);
 
+            // Remove element from hash table by "key" and "hash".
             LRUHandle* remove(const CacheKey& key, uint32_t hash);
 
+            // Remove element from hash table by "h", it would be faster
+            // than the function above.
+            void remove(const LRUHandle* h);
+
         private:
+            FRIEND_TEST(CacheTest, HandleTableTest);
+
             // The tablet consists of an array of buckets where each bucket is
             // a linked list of cache entries that hash into the bucket.
             uint32_t _length;
@@ -292,6 +299,10 @@ namespace doris {
             // matches key/hash.  If there is no such cache entry, return a
             // pointer to the trailing slot in the corresponding linked list.
             LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash);
+
+            // Insert "handle" after "head".
+            void _head_insert(LRUHandle* head, LRUHandle* handle);
+
             void _resize();
     };
 
@@ -336,15 +347,15 @@ namespace doris {
             void _lru_remove(LRUHandle* e);
             void _lru_append(LRUHandle* list, LRUHandle* e);
             bool _unref(LRUHandle* e);
-            void _evict_from_lru(size_t charge, std::vector<LRUHandle*>* 
deleted);
+            void _evict_from_lru(size_t charge, LRUHandle** to_remove_head);
             void _evict_one_entry(LRUHandle* e);
 
             // Initialized before use.
-            size_t _capacity;
+            size_t _capacity = 0;
 
             // _mutex protects the following state.
             Mutex _mutex;
-            size_t _usage;
+            size_t _usage = 0;
 
             // Dummy head of LRU list.
             // lru.prev is newest entry, lru.next is oldest entry.
@@ -353,8 +364,8 @@ namespace doris {
 
             HandleTable _table;
 
-            uint64_t _lookup_count;    // cache查找总次数
-            uint64_t _hit_count;       // 命中cache的总次数
+            uint64_t _lookup_count = 0;    // cache查找总次数
+            uint64_t _hit_count = 0;       // 命中cache的总次数
     };
 
     static const int kNumShardBits = 4;
@@ -378,6 +389,8 @@ namespace doris {
             Slice value_slice(Handle* handle) override;
             virtual uint64_t new_id();
             virtual void prune();
+
+        private:
             void update_cache_metrics() const;
 
         private:
diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp
index 537b696..a663e8f 100644
--- a/be/test/olap/lru_cache_test.cpp
+++ b/be/test/olap/lru_cache_test.cpp
@@ -299,6 +299,119 @@ TEST_F(CacheTest, NewId) {
     ASSERT_NE(a, b);
 }
 
+TEST_F(CacheTest, SimpleBenchmark) {
+    for (int i = 0; i < kCacheSize * 10000; i++) {
+        Insert(1000 + i, 2000 + i, 1);
+        ASSERT_EQ(2000 + i, Lookup(1000 + i));
+    }
+}
+
+TEST(CacheHandleTest, HandleTableTest) {
+    HandleTable ht;
+
+    for (uint32_t i = 0; i < ht._length; ++i) {
+        ASSERT_NE(ht._list[i], nullptr);
+        ASSERT_EQ(ht._list[i]->next_hash, nullptr);
+        ASSERT_EQ(ht._list[i]->prev_hash, nullptr);
+    }
+
+    const int count = 10;
+    CacheKey keys[count] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
+    ASSERT_NE(keys[0], keys[1]);
+    LRUHandle* hs[count];
+    for (int i = 0; i < count; ++i) {
+        CacheKey* key = &keys[i];
+        LRUHandle* h = reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 
1 + key->size()));
+        h->value = nullptr;
+        h->deleter = nullptr;
+        h->charge = 1;
+        h->key_length = key->size();
+        h->hash = 1;  // make them in a same hash table linked-list
+        h->refs = 0;
+        h->next = h->prev = nullptr;
+        h->prev_hash = nullptr;
+        h->next_hash = nullptr;
+        h->in_cache = false;
+        h->priority = CachePriority::NORMAL;
+        memcpy(h->key_data, key->data(), key->size());
+
+        LRUHandle* old = ht.insert(h);
+        ASSERT_EQ(ht._elems, i + 1);
+        ASSERT_EQ(old, nullptr);  // there is no entry with the same key and 
hash
+        hs[i] = h;
+    }
+    ASSERT_EQ(ht._elems, count);
+    LRUHandle* h = ht.lookup(CacheKey(std::to_string(count - 1)), 1);
+    LRUHandle* head = ht._list[1 & (ht._length - 1)];
+    ASSERT_EQ(head, h->prev_hash);
+    ASSERT_EQ(head->next_hash, h);
+    int index = count - 1;
+    while (h != nullptr) {
+        ASSERT_EQ(hs[index], h) << index;
+        h = h->next_hash;
+        if (h != nullptr) {
+            ASSERT_EQ(hs[index], h->prev_hash);
+        }
+        --index;
+    }
+
+    for (int i = 0; i < count; ++i) {
+        CacheKey* key = &keys[i];
+        LRUHandle* h = reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 
1 + key->size()));
+        h->value = nullptr;
+        h->deleter = nullptr;
+        h->charge = 1;
+        h->key_length = key->size();
+        h->hash = 1;  // make them in a same hash table linked-list
+        h->refs = 0;
+        h->next = h->prev = nullptr;
+        h->prev_hash = nullptr;
+        h->next_hash = nullptr;
+        h->in_cache = false;
+        h->priority = CachePriority::NORMAL;
+        memcpy(h->key_data, key->data(), key->size());
+
+        ASSERT_EQ(ht.insert(h), hs[i]);  // there is an entry with the same 
key and hash
+        ASSERT_EQ(ht._elems, count);
+        free(hs[i]);
+        hs[i] = h;
+    }
+    ASSERT_EQ(ht._elems, count);
+
+    for (int i = 0; i < count; ++i) {
+        ASSERT_EQ(ht.lookup(keys[i], 1), hs[i]);
+    }
+
+    LRUHandle* old = ht.remove(CacheKey("9"), 1);  // first in hash table 
linked-list
+    ASSERT_EQ(old, hs[9]);
+    ASSERT_EQ(old->prev_hash, head);
+    ASSERT_EQ(old->next_hash, hs[8]);   // hs[8] is the new first node
+    ASSERT_EQ(head->next_hash, hs[8]);
+    ASSERT_EQ(hs[8]->prev_hash, head);
+
+    old = ht.remove(CacheKey("0"), 1);  // last in hash table linked-list
+    ASSERT_EQ(old, hs[0]);
+    ASSERT_EQ(old->prev_hash, hs[1]);   // hs[1] is the new last node
+    ASSERT_EQ(old->prev_hash->next_hash, nullptr);
+
+    old = ht.remove(CacheKey("5"), 1);  // middle in hash table linked-list
+    ASSERT_EQ(old, hs[5]);
+    ASSERT_EQ(old->prev_hash, hs[6]);
+    ASSERT_EQ(old->next_hash, hs[4]);
+    ASSERT_EQ(hs[6]->next_hash, hs[4]);
+    ASSERT_EQ(hs[4]->prev_hash, hs[6]);
+
+    ht.remove(hs[4]);  // middle in hash table linked-list
+    ASSERT_EQ(hs[6]->next_hash, hs[3]);
+    ASSERT_EQ(hs[3]->prev_hash, hs[6]);
+
+    ASSERT_EQ(ht._elems, count - 4);
+
+    for (int i = 0; i < count; ++i) {
+        free(hs[i]);
+    }
+}
+
 }  // namespace doris
 
 int main(int argc, char** argv) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[incubator-doris] branch master updated: [Optimize] Improve LRU cache's performance (#4781)

Reply via email to