This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2929a96224 [Refactor](inverted index cache) Use asc set instead of 
priority queue at the lru cache (#18033)
2929a96224 is described below

commit 2929a962244558a5e23fc105e9f57b30ebcda2a0
Author: YueW <[email protected]>
AuthorDate: Mon Mar 27 10:27:37 2023 +0800

    [Refactor](inverted index cache) Use asc set instead of priority queue at 
the lru cache (#18033)
    
    use asc set instead of priority queue at the LRU cache, to keep the 
lifecycle of the LRUHandle consistent in the sorted set and the LRU free list
---
 be/src/olap/lru_cache.cpp                          | 79 +++++++++++-----------
 be/src/olap/lru_cache.h                            | 15 ++--
 .../inverted_index_searcher_cache_test.cpp         | 68 +++++++++++++++++++
 3 files changed, 114 insertions(+), 48 deletions(-)

diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 57d7500b4a..529f75ec69 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -189,6 +189,22 @@ void LRUCache::_lru_remove(LRUHandle* e) {
     e->next->prev = e->prev;
     e->prev->next = e->next;
     e->prev = e->next = nullptr;
+
+    if (_cache_value_check_timestamp) {
+        if (e->priority == CachePriority::NORMAL) {
+            auto pair = std::make_pair(_cache_value_time_extractor(e->value), 
e);
+            auto found_it = _sorted_normal_entries_with_timestamp.find(pair);
+            if (found_it != _sorted_normal_entries_with_timestamp.end()) {
+                _sorted_normal_entries_with_timestamp.erase(found_it);
+            }
+        } else if (e->priority == CachePriority::DURABLE) {
+            auto pair = std::make_pair(_cache_value_time_extractor(e->value), 
e);
+            auto found_it = _sorted_durable_entries_with_timestamp.find(pair);
+            if (found_it != _sorted_durable_entries_with_timestamp.end()) {
+                _sorted_durable_entries_with_timestamp.erase(found_it);
+            }
+        }
+    }
 }
 
 void LRUCache::_lru_append(LRUHandle* list, LRUHandle* e) {
@@ -197,6 +213,24 @@ void LRUCache::_lru_append(LRUHandle* list, LRUHandle* e) {
     e->prev = list->prev;
     e->prev->next = e;
     e->next->prev = e;
+
+    // _cache_value_check_timestamp is true,
+    // means evict entry will depends on the timestamp asc set,
+    // the timestamp is updated by higher level caller,
+    // and the timestamp of hit entry is different with the insert entry,
+    // that is why need check timestamp to evict entry,
+    // in order to keep the survival time of hit entries
+    // longer than the entries just inserted,
+    // so use asc set to sorted these entries's timestamp and LRUHandle*
+    if (_cache_value_check_timestamp) {
+        if (e->priority == CachePriority::NORMAL) {
+            _sorted_normal_entries_with_timestamp.insert(
+                    std::make_pair(_cache_value_time_extractor(e->value), e));
+        } else if (e->priority == CachePriority::DURABLE) {
+            _sorted_durable_entries_with_timestamp.insert(
+                    std::make_pair(_cache_value_time_extractor(e->value), e));
+        }
+    }
 }
 
 Cache::Handle* LRUCache::lookup(const CacheKey& key, uint32_t hash) {
@@ -244,23 +278,6 @@ void LRUCache::release(Cache::Handle* handle) {
                 } else if (e->priority == CachePriority::DURABLE) {
                     _lru_append(&_lru_durable, e);
                 }
-                // _cache_value_check_timestamp is true,
-                // means evict entry will depends on the timestamp sequence,
-                // the timestamp is updated by higher level caller,
-                // and the timestamp of hit entry is different with the insert 
entry,
-                // that is why need check timestamp to evict entry,
-                // in order to keep the survival time of hit entries
-                // longer than the entries just inserted,
-                // so use priority_queue to sorted these entries's timestamp 
and LRUHandle*
-                if (_cache_value_check_timestamp) {
-                    if (e->priority == CachePriority::NORMAL) {
-                        _sorted_normal_entries_with_timestamp.push(
-                                
std::make_pair(_cache_value_time_extractor(e->value), e));
-                    } else if (e->priority == CachePriority::DURABLE) {
-                        _sorted_durable_entries_with_timestamp.push(
-                                
std::make_pair(_cache_value_time_extractor(e->value), e));
-                    }
-                }
             }
         }
     }
@@ -275,41 +292,25 @@ void LRUCache::_evict_from_lru_with_time(size_t 
total_size, LRUHandle** to_remov
     // 1. evict normal cache entries
     while ((_usage + total_size > _capacity || _check_element_count_limit()) &&
            !_sorted_normal_entries_with_timestamp.empty()) {
-        auto entry_pair = _sorted_normal_entries_with_timestamp.top();
-        LRUHandle* remove_handle = entry_pair.second;
-        if (_cache_value_time_extractor(remove_handle->value) != 
entry_pair.first) {
-            // Time in cache value maybe updated when higher level call 
LRUCache::release(),
-            // get time by _cache_value_time_extractor is the latest.
-            // Because remove element can only pop from the priority_queue 
header,
-            // so old <timestamp, LRUHandle*> keep in the priority_queue until 
pop it here.
-            _sorted_normal_entries_with_timestamp.pop();
-            continue;
-        }
+        auto entry_pair = _sorted_normal_entries_with_timestamp.begin();
+        LRUHandle* remove_handle = entry_pair->second;
+        DCHECK(remove_handle != nullptr);
         DCHECK(remove_handle->priority == CachePriority::NORMAL);
         _evict_one_entry(remove_handle);
         remove_handle->next = *to_remove_head;
         *to_remove_head = remove_handle;
-        _sorted_normal_entries_with_timestamp.pop();
     }
 
     // 2. evict durable cache entries if need
     while ((_usage + total_size > _capacity || _check_element_count_limit()) &&
            !_sorted_durable_entries_with_timestamp.empty()) {
-        auto entry_pair = _sorted_durable_entries_with_timestamp.top();
-        LRUHandle* remove_handle = entry_pair.second;
-        if (_cache_value_time_extractor(remove_handle->value) != 
entry_pair.first) {
-            // Time in cache value maybe updated when higher level call 
LRUCache::release(),
-            // get time by _cache_value_time_extractor is the latest.
-            // Because remove element can only pop from the priority_queue 
header,
-            // so old <timestamp, LRUHandle*> keep in the priority_queue until 
pop it here.
-            _sorted_durable_entries_with_timestamp.pop();
-            continue;
-        }
+        auto entry_pair = _sorted_durable_entries_with_timestamp.begin();
+        LRUHandle* remove_handle = entry_pair->second;
+        DCHECK(remove_handle != nullptr);
         DCHECK(remove_handle->priority == CachePriority::DURABLE);
         _evict_one_entry(remove_handle);
         remove_handle->next = *to_remove_head;
         *to_remove_head = remove_handle;
-        _sorted_durable_entries_with_timestamp.pop();
     }
 }
 
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 8226ecc858..c479c4e443 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -314,13 +314,10 @@ private:
     void _resize();
 };
 
-// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc 
priority_queue,
-// when need to free space, can first evict the top of the LRUHandleHeap,
-// because the top element's timestamp is the oldest.
-typedef std::priority_queue<std::pair<int64_t, LRUHandle*>,
-                            std::vector<std::pair<int64_t, LRUHandle*>>,
-                            std::greater<std::pair<int64_t, LRUHandle*>>>
-        LRUHandleHeap;
+// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc set,
+// when need to free space, can first evict the begin of the set,
+// because the begin element's timestamp is the oldest.
+using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>;
 
 // A single shard of sharded cache.
 class LRUCache {
@@ -386,8 +383,8 @@ private:
 
     CacheValueTimeExtractor _cache_value_time_extractor;
     bool _cache_value_check_timestamp = false;
-    LRUHandleHeap _sorted_normal_entries_with_timestamp;
-    LRUHandleHeap _sorted_durable_entries_with_timestamp;
+    LRUHandleSortedSet _sorted_normal_entries_with_timestamp;
+    LRUHandleSortedSet _sorted_durable_entries_with_timestamp;
 
     uint32_t _element_count_capacity = 0;
 };
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
index f0fca74b19..87ebd80664 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
@@ -296,5 +296,73 @@ TEST_F(InvertedIndexSearcherCacheTest, 
evict_by_element_count_limit) {
     delete index_searcher_cache;
 }
 
+TEST_F(InvertedIndexSearcherCacheTest, remove_element_only_in_table) {
+    InvertedIndexSearcherCache* index_searcher_cache =
+            new InvertedIndexSearcherCache(kCacheSize, 1);
+    // no need evict
+    std::string file_name_1 = "test_1.idx";
+    InvertedIndexSearcherCache::CacheKey key_1(file_name_1);
+    IndexCacheValuePtr cache_value_1 = 
std::make_unique<InvertedIndexSearcherCache::CacheValue>();
+    cache_value_1->size = 200;
+    cache_value_1->index_searcher = nullptr;
+    cache_value_1->last_visit_time = 10;
+    index_searcher_cache->_cache->release(
+            index_searcher_cache->_insert(key_1, cache_value_1.release()));
+
+    std::string file_name_2 = "test_2.idx";
+    InvertedIndexSearcherCache::CacheKey key_2(file_name_2);
+    IndexCacheValuePtr cache_value_2 = 
std::make_unique<InvertedIndexSearcherCache::CacheValue>();
+
+    // lookup key_1, insert key_2, release key_2, release key_1
+    {
+        InvertedIndexCacheHandle cache_handle;
+        // lookup key_1
+        EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle));
+
+        // insert key_2, and then evict {key_1, cache_value_1}
+        cache_value_2->size = 800;
+        cache_value_2->index_searcher = nullptr;
+        cache_value_2->last_visit_time = 20;
+        index_searcher_cache->_cache->release(
+                index_searcher_cache->_insert(key_2, cache_value_2.release()));
+
+        // lookup key_2, key_2 has removed from table due to cache is full
+        {
+            InvertedIndexCacheHandle cache_handle;
+            EXPECT_FALSE(index_searcher_cache->_lookup(key_2, &cache_handle));
+        }
+    }
+
+    // lookup key_1 exist
+    {
+        InvertedIndexCacheHandle cache_handle;
+        EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle));
+    }
+
+    // lookup key_2 not exist, then insert into cache, and evict key_1
+    OlapReaderStatistics stats;
+    {
+        InvertedIndexCacheHandle inverted_index_cache_handle;
+        auto status = index_searcher_cache->get_index_searcher(
+                fs, kTestDir, file_name_2, &inverted_index_cache_handle, 
&stats);
+        EXPECT_EQ(Status::OK(), status);
+        EXPECT_FALSE(inverted_index_cache_handle.owned);
+    }
+    // lookup key_2 again
+    {
+        InvertedIndexCacheHandle inverted_index_cache_handle;
+        auto status = index_searcher_cache->get_index_searcher(
+                fs, kTestDir, file_name_2, &inverted_index_cache_handle, 
&stats);
+        EXPECT_EQ(Status::OK(), status);
+        EXPECT_FALSE(inverted_index_cache_handle.owned);
+        auto cache_value_use_cache =
+                
(InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle._cache)
+                        ->value(inverted_index_cache_handle._handle);
+        EXPECT_LT(UnixMillis(), cache_value_use_cache->last_visit_time);
+    }
+
+    delete index_searcher_cache;
+}
+
 } // namespace segment_v2
 } // namespace doris
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to