This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2929a96224 [Refactor](inverted index cache) Use asc set instead of
priority queue at the lru cache (#18033)
2929a96224 is described below
commit 2929a962244558a5e23fc105e9f57b30ebcda2a0
Author: YueW <[email protected]>
AuthorDate: Mon Mar 27 10:27:37 2023 +0800
[Refactor](inverted index cache) Use asc set instead of priority queue at
the lru cache (#18033)
use asc set instead of priority queue at the LRU cache, to keep the
lifecycle of the LRUHandle consistent in the sorted set and the LRU free list
---
be/src/olap/lru_cache.cpp | 79 +++++++++++-----------
be/src/olap/lru_cache.h | 15 ++--
.../inverted_index_searcher_cache_test.cpp | 68 +++++++++++++++++++
3 files changed, 114 insertions(+), 48 deletions(-)
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 57d7500b4a..529f75ec69 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -189,6 +189,22 @@ void LRUCache::_lru_remove(LRUHandle* e) {
e->next->prev = e->prev;
e->prev->next = e->next;
e->prev = e->next = nullptr;
+
+ if (_cache_value_check_timestamp) {
+ if (e->priority == CachePriority::NORMAL) {
+ auto pair = std::make_pair(_cache_value_time_extractor(e->value),
e);
+ auto found_it = _sorted_normal_entries_with_timestamp.find(pair);
+ if (found_it != _sorted_normal_entries_with_timestamp.end()) {
+ _sorted_normal_entries_with_timestamp.erase(found_it);
+ }
+ } else if (e->priority == CachePriority::DURABLE) {
+ auto pair = std::make_pair(_cache_value_time_extractor(e->value),
e);
+ auto found_it = _sorted_durable_entries_with_timestamp.find(pair);
+ if (found_it != _sorted_durable_entries_with_timestamp.end()) {
+ _sorted_durable_entries_with_timestamp.erase(found_it);
+ }
+ }
+ }
}
void LRUCache::_lru_append(LRUHandle* list, LRUHandle* e) {
@@ -197,6 +213,24 @@ void LRUCache::_lru_append(LRUHandle* list, LRUHandle* e) {
e->prev = list->prev;
e->prev->next = e;
e->next->prev = e;
+
+ // _cache_value_check_timestamp is true,
+ // means evict entry will depends on the timestamp asc set,
+ // the timestamp is updated by higher level caller,
+ // and the timestamp of hit entry is different with the insert entry,
+ // that is why need check timestamp to evict entry,
+ // in order to keep the survival time of hit entries
+ // longer than the entries just inserted,
+ // so use asc set to sorted these entries's timestamp and LRUHandle*
+ if (_cache_value_check_timestamp) {
+ if (e->priority == CachePriority::NORMAL) {
+ _sorted_normal_entries_with_timestamp.insert(
+ std::make_pair(_cache_value_time_extractor(e->value), e));
+ } else if (e->priority == CachePriority::DURABLE) {
+ _sorted_durable_entries_with_timestamp.insert(
+ std::make_pair(_cache_value_time_extractor(e->value), e));
+ }
+ }
}
Cache::Handle* LRUCache::lookup(const CacheKey& key, uint32_t hash) {
@@ -244,23 +278,6 @@ void LRUCache::release(Cache::Handle* handle) {
} else if (e->priority == CachePriority::DURABLE) {
_lru_append(&_lru_durable, e);
}
- // _cache_value_check_timestamp is true,
- // means evict entry will depends on the timestamp sequence,
- // the timestamp is updated by higher level caller,
- // and the timestamp of hit entry is different with the insert
entry,
- // that is why need check timestamp to evict entry,
- // in order to keep the survival time of hit entries
- // longer than the entries just inserted,
- // so use priority_queue to sorted these entries's timestamp
and LRUHandle*
- if (_cache_value_check_timestamp) {
- if (e->priority == CachePriority::NORMAL) {
- _sorted_normal_entries_with_timestamp.push(
-
std::make_pair(_cache_value_time_extractor(e->value), e));
- } else if (e->priority == CachePriority::DURABLE) {
- _sorted_durable_entries_with_timestamp.push(
-
std::make_pair(_cache_value_time_extractor(e->value), e));
- }
- }
}
}
}
@@ -275,41 +292,25 @@ void LRUCache::_evict_from_lru_with_time(size_t
total_size, LRUHandle** to_remov
// 1. evict normal cache entries
while ((_usage + total_size > _capacity || _check_element_count_limit()) &&
!_sorted_normal_entries_with_timestamp.empty()) {
- auto entry_pair = _sorted_normal_entries_with_timestamp.top();
- LRUHandle* remove_handle = entry_pair.second;
- if (_cache_value_time_extractor(remove_handle->value) !=
entry_pair.first) {
- // Time in cache value maybe updated when higher level call
LRUCache::release(),
- // get time by _cache_value_time_extractor is the latest.
- // Because remove element can only pop from the priority_queue
header,
- // so old <timestamp, LRUHandle*> keep in the priority_queue until
pop it here.
- _sorted_normal_entries_with_timestamp.pop();
- continue;
- }
+ auto entry_pair = _sorted_normal_entries_with_timestamp.begin();
+ LRUHandle* remove_handle = entry_pair->second;
+ DCHECK(remove_handle != nullptr);
DCHECK(remove_handle->priority == CachePriority::NORMAL);
_evict_one_entry(remove_handle);
remove_handle->next = *to_remove_head;
*to_remove_head = remove_handle;
- _sorted_normal_entries_with_timestamp.pop();
}
// 2. evict durable cache entries if need
while ((_usage + total_size > _capacity || _check_element_count_limit()) &&
!_sorted_durable_entries_with_timestamp.empty()) {
- auto entry_pair = _sorted_durable_entries_with_timestamp.top();
- LRUHandle* remove_handle = entry_pair.second;
- if (_cache_value_time_extractor(remove_handle->value) !=
entry_pair.first) {
- // Time in cache value maybe updated when higher level call
LRUCache::release(),
- // get time by _cache_value_time_extractor is the latest.
- // Because remove element can only pop from the priority_queue
header,
- // so old <timestamp, LRUHandle*> keep in the priority_queue until
pop it here.
- _sorted_durable_entries_with_timestamp.pop();
- continue;
- }
+ auto entry_pair = _sorted_durable_entries_with_timestamp.begin();
+ LRUHandle* remove_handle = entry_pair->second;
+ DCHECK(remove_handle != nullptr);
DCHECK(remove_handle->priority == CachePriority::DURABLE);
_evict_one_entry(remove_handle);
remove_handle->next = *to_remove_head;
*to_remove_head = remove_handle;
- _sorted_durable_entries_with_timestamp.pop();
}
}
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 8226ecc858..c479c4e443 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -314,13 +314,10 @@ private:
void _resize();
};
-// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc
priority_queue,
-// when need to free space, can first evict the top of the LRUHandleHeap,
-// because the top element's timestamp is the oldest.
-typedef std::priority_queue<std::pair<int64_t, LRUHandle*>,
- std::vector<std::pair<int64_t, LRUHandle*>>,
- std::greater<std::pair<int64_t, LRUHandle*>>>
- LRUHandleHeap;
+// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc set,
+// when need to free space, can first evict the begin of the set,
+// because the begin element's timestamp is the oldest.
+using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>;
// A single shard of sharded cache.
class LRUCache {
@@ -386,8 +383,8 @@ private:
CacheValueTimeExtractor _cache_value_time_extractor;
bool _cache_value_check_timestamp = false;
- LRUHandleHeap _sorted_normal_entries_with_timestamp;
- LRUHandleHeap _sorted_durable_entries_with_timestamp;
+ LRUHandleSortedSet _sorted_normal_entries_with_timestamp;
+ LRUHandleSortedSet _sorted_durable_entries_with_timestamp;
uint32_t _element_count_capacity = 0;
};
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
index f0fca74b19..87ebd80664 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp
@@ -296,5 +296,73 @@ TEST_F(InvertedIndexSearcherCacheTest,
evict_by_element_count_limit) {
delete index_searcher_cache;
}
+TEST_F(InvertedIndexSearcherCacheTest, remove_element_only_in_table) {
+ InvertedIndexSearcherCache* index_searcher_cache =
+ new InvertedIndexSearcherCache(kCacheSize, 1);
+ // no need evict
+ std::string file_name_1 = "test_1.idx";
+ InvertedIndexSearcherCache::CacheKey key_1(file_name_1);
+ IndexCacheValuePtr cache_value_1 =
std::make_unique<InvertedIndexSearcherCache::CacheValue>();
+ cache_value_1->size = 200;
+ cache_value_1->index_searcher = nullptr;
+ cache_value_1->last_visit_time = 10;
+ index_searcher_cache->_cache->release(
+ index_searcher_cache->_insert(key_1, cache_value_1.release()));
+
+ std::string file_name_2 = "test_2.idx";
+ InvertedIndexSearcherCache::CacheKey key_2(file_name_2);
+ IndexCacheValuePtr cache_value_2 =
std::make_unique<InvertedIndexSearcherCache::CacheValue>();
+
+ // lookup key_1, insert key_2, release key_2, release key_1
+ {
+ InvertedIndexCacheHandle cache_handle;
+ // lookup key_1
+ EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle));
+
+ // insert key_2, and then evict {key_1, cache_value_1}
+ cache_value_2->size = 800;
+ cache_value_2->index_searcher = nullptr;
+ cache_value_2->last_visit_time = 20;
+ index_searcher_cache->_cache->release(
+ index_searcher_cache->_insert(key_2, cache_value_2.release()));
+
+ // lookup key_2, key_2 has removed from table due to cache is full
+ {
+ InvertedIndexCacheHandle cache_handle;
+ EXPECT_FALSE(index_searcher_cache->_lookup(key_2, &cache_handle));
+ }
+ }
+
+ // lookup key_1 exist
+ {
+ InvertedIndexCacheHandle cache_handle;
+ EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle));
+ }
+
+ // lookup key_2 not exist, then insert into cache, and evict key_1
+ OlapReaderStatistics stats;
+ {
+ InvertedIndexCacheHandle inverted_index_cache_handle;
+ auto status = index_searcher_cache->get_index_searcher(
+ fs, kTestDir, file_name_2, &inverted_index_cache_handle,
&stats);
+ EXPECT_EQ(Status::OK(), status);
+ EXPECT_FALSE(inverted_index_cache_handle.owned);
+ }
+ // lookup key_2 again
+ {
+ InvertedIndexCacheHandle inverted_index_cache_handle;
+ auto status = index_searcher_cache->get_index_searcher(
+ fs, kTestDir, file_name_2, &inverted_index_cache_handle,
&stats);
+ EXPECT_EQ(Status::OK(), status);
+ EXPECT_FALSE(inverted_index_cache_handle.owned);
+ auto cache_value_use_cache =
+
(InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle._cache)
+ ->value(inverted_index_cache_handle._handle);
+ EXPECT_LT(UnixMillis(), cache_value_use_cache->last_visit_time);
+ }
+
+ delete index_searcher_cache;
+}
+
} // namespace segment_v2
} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]