This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch cs_opt_version-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/cs_opt_version-3.1 by this
push:
new eadda13c4e5 cherry pick like opt/lru-k
eadda13c4e5 is described below
commit eadda13c4e54664900bde546418370c0e0fe9b01
Author: HappenLee <[email protected]>
AuthorDate: Wed Jul 9 21:50:03 2025 +0800
cherry pick like opt/lru-k
---
be/src/olap/like_column_predicate.cpp | 21 ++----
be/src/olap/like_column_predicate.h | 65 +++++++++++++------
be/src/olap/lru_cache.cpp | 104 +++++++++++++++++++++++------
be/src/olap/lru_cache.h | 46 ++++++-------
be/src/olap/page_cache.h | 3 +-
be/src/runtime/memory/lru_cache_policy.h | 9 +--
be/src/vec/functions/like.cpp | 13 ++--
be/test/olap/lru_cache_test.cpp | 108 +++++++++++++++++++++++++++++--
be/test/olap/page_cache_test.cpp | 29 ++++++++-
9 files changed, 298 insertions(+), 100 deletions(-)
diff --git a/be/src/olap/like_column_predicate.cpp
b/be/src/olap/like_column_predicate.cpp
index b441e982606..6da2aa3062f 100644
--- a/be/src/olap/like_column_predicate.cpp
+++ b/be/src/olap/like_column_predicate.cpp
@@ -62,15 +62,12 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const
vectorized::IColumn& colu
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
auto& data_array = nested_col_ptr->get_data();
+ const auto& dict_res =
*_find_code_from_dictionary_column(*nested_col_ptr);
if (!nullable_col->has_null()) {
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
- StringRef cell_value =
nested_col_ptr->get_shrink_value(data_array[idx]);
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
-
const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size),
pattern, &flag));
+ unsigned char flag = dict_res[data_array[idx]];
new_size += _opposite ^ flag;
}
} else {
@@ -81,12 +78,7 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const
vectorized::IColumn& colu
new_size += _opposite;
continue;
}
-
- StringRef cell_value =
nested_col_ptr->get_shrink_value(data_array[idx]);
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
-
const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size),
pattern, &flag));
+ unsigned char flag = dict_res[data_array[idx]];
new_size += _opposite ^ flag;
}
}
@@ -126,15 +118,12 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const
vectorized::IColumn& colu
if (column.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
+ const auto& dict_res =
*_find_code_from_dictionary_column(*nested_col_ptr);
auto& data_array = nested_col_ptr->get_data();
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
- StringRef cell_value =
nested_col_ptr->get_shrink_value(data_array[idx]);
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
- const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size), pattern,
&flag));
+ unsigned char flag = dict_res[data_array[idx]];
new_size += _opposite ^ flag;
}
} else {
diff --git a/be/src/olap/like_column_predicate.h
b/be/src/olap/like_column_predicate.h
index 31763d45f7e..7402d8c9f5a 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -101,6 +101,7 @@ private:
if (nested_col.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
+ const auto& dict_res =
*_find_code_from_dictionary_column(*nested_col_ptr);
auto& data_array = nested_col_ptr->get_data();
for (uint16_t i = 0; i < size; i++) {
if (null_map_data[i]) {
@@ -112,18 +113,10 @@ private:
continue;
}
- StringRef cell_value =
nested_col_ptr->get_shrink_value(data_array[i]);
+ unsigned char flag = dict_res[data_array[i]];
if constexpr (is_and) {
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
-
const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size),
pattern, &flag));
flags[i] &= _opposite ^ flag;
} else {
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
-
const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size),
pattern, &flag));
flags[i] = _opposite ^ flag;
}
}
@@ -136,19 +129,12 @@ private:
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
+ const auto& dict_res =
*_find_code_from_dictionary_column(*nested_col_ptr);
for (uint16_t i = 0; i < size; i++) {
- StringRef cell_value =
nested_col_ptr->get_shrink_value(data_array[i]);
+ unsigned char flag = dict_res[data_array[i]];
if constexpr (is_and) {
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
-
const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size),
pattern, &flag));
flags[i] &= _opposite ^ flag;
} else {
- unsigned char flag = 0;
- static_cast<void>((_state->scalar_function)(
-
const_cast<vectorized::LikeSearchState*>(&_like_state),
- StringRef(cell_value.data, cell_value.size),
pattern, &flag));
flags[i] = _opposite ^ flag;
}
}
@@ -159,6 +145,49 @@ private:
}
}
+ __attribute__((flatten)) std::vector<bool>*
_find_code_from_dictionary_column(
+ const vectorized::ColumnDictI32& column) const {
+ std::vector<bool>* res = nullptr;
+ if (_segment_id_to_cached_res_flags.if_contains(
+ column.get_rowset_segment_id(),
+ [&res](const auto& pair) { res = &pair.second; })) {
+ return res;
+ }
+
+ std::vector<bool> tmp_res(column.dict_size(), false);
+ for (int i = 0; i < column.dict_size(); i++) {
+ StringRef cell_value = column.get_shrink_value(i);
+ unsigned char flag = 0;
+ static_cast<void>((_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.data, cell_value.size), pattern,
&flag));
+ tmp_res[i] = flag;
+ }
+ // Sometimes the dict is not initialized when run comparison predicate
here, for example,
+ // the full page is null, then the reader will skip read, so that the
dictionary is not
+ // inited. The cached code is wrong during this case, because the
following page maybe not
+ // null, and the dict should have items in the future.
+ //
+ // Cached code may have problems, so that add a config here, if not
opened, then
+ // we will return the code and not cache it.
+ if (!column.is_dict_empty() &&
config::enable_low_cardinality_cache_code) {
+ _segment_id_to_cached_res_flags.emplace(
+ std::pair {column.get_rowset_segment_id(), tmp_res});
+ }
+
+ _segment_id_to_cached_res_flags.if_contains(
+ column.get_rowset_segment_id(), [&res](const auto& pair) { res
= &pair.second; });
+ return res;
+ }
+
+ mutable phmap::parallel_flat_hash_map<
+ std::pair<RowsetId, uint32_t>, std::vector<bool>,
+ phmap::priv::hash_default_hash<std::pair<RowsetId, uint32_t>>,
+ phmap::priv::hash_default_eq<std::pair<RowsetId, uint32_t>>,
+ std::allocator<std::pair<const std::pair<RowsetId, uint32_t>,
int32_t>>, 4,
+ std::shared_mutex>
+ _segment_id_to_cached_res_flags;
+
std::string _debug_string() const override {
std::string info = "LikeColumnPredicate";
return info;
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 9bb21ef717d..673a13b5392 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -165,7 +165,7 @@ uint32_t HandleTable::element_count() const {
return _elems;
}
-LRUCache::LRUCache(LRUCacheType type) : _type(type) {
+LRUCache::LRUCache(LRUCacheType type, bool is_lru_k) : _type(type),
_is_lru_k(is_lru_k) {
// Make empty circular linked list
_lru_normal.next = &_lru_normal;
_lru_normal.prev = &_lru_normal;
@@ -295,6 +295,17 @@ Cache::Handle* LRUCache::lookup(const CacheKey& key,
uint32_t hash) {
++_hit_count;
e->last_visit_time = UnixMillis();
}
+
+ // If key not exist in cache, and is lru k cache, and key in visits list,
+ // then move the key to beginning of the visits list.
+ // key in visits list indicates that the key has been inserted once after
the cache is full.
+ if (e == nullptr && _is_lru_k) {
+ auto it = _visits_lru_cache_map.find(hash);
+ if (it != _visits_lru_cache_map.end()) {
+ _visits_lru_cache_list.splice(_visits_lru_cache_list.begin(),
_visits_lru_cache_list,
+ it->second);
+ }
+ }
return reinterpret_cast<Cache::Handle*>(e);
}
@@ -306,10 +317,10 @@ void LRUCache::release(Cache::Handle* handle) {
bool last_ref = false;
{
std::lock_guard l(_mutex);
+ // if last_ref is true, key may have been evict from the cache,
+ // or if it is lru k, first insert of key may have failed.
last_ref = _unref(e);
- if (last_ref) {
- _usage -= e->total_size;
- } else if (e->in_cache && e->refs == 1) {
+ if (e->in_cache && e->refs == 1) {
// only exists in cache
if (_usage > _capacity) {
// take this opportunity and remove the item
@@ -317,6 +328,8 @@ void LRUCache::release(Cache::Handle* handle) {
DCHECK(removed);
e->in_cache = false;
_unref(e);
+ // `entry->in_cache = false` and `_usage -=
entry->total_size;` and `_unref(entry)` should appear together.
+ // see the comment for old entry in `LRUCache::insert`.
_usage -= e->total_size;
last_ref = true;
} else {
@@ -391,6 +404,8 @@ void LRUCache::_evict_one_entry(LRUHandle* e) {
DCHECK(removed);
e->in_cache = false;
_unref(e);
+ // `entry->in_cache = false` and `_usage -= entry->total_size;` and
`_unref(entry)` should appear together.
+ // see the comment for old entry in `LRUCache::insert`.
_usage -= e->total_size;
}
@@ -398,6 +413,42 @@ bool LRUCache::_check_element_count_limit() {
return _element_count_capacity != 0 && _table.element_count() >=
_element_count_capacity;
}
+// After cache is full,
+// 1.Return false. If key has been inserted into the visits list before,
+// key is allowed to be inserted into cache this time (this will trigger cache
evict),
+// and key is removed from the visits list.
+// 2. Return true. If key not in visits list, insert it into visits list.
+bool LRUCache::_lru_k_insert_visits_list(size_t total_size,
visits_lru_cache_key visits_key) {
+ if (_usage + total_size > _capacity ||
+ _check_element_count_limit()) { // this line no lock required
+ auto it = _visits_lru_cache_map.find(visits_key);
+ if (it != _visits_lru_cache_map.end()) {
+ _visits_lru_cache_usage -= it->second->second;
+ _visits_lru_cache_list.erase(it->second);
+ _visits_lru_cache_map.erase(it);
+ } else {
+ // _visits_lru_cache_list capacity is same as the cache itself.
+ // If _visits_lru_cache_list is full, some keys will also be evict.
+ while (_visits_lru_cache_usage + total_size > _capacity &&
+ _visits_lru_cache_usage != 0) {
+ DCHECK(!_visits_lru_cache_map.empty());
+ _visits_lru_cache_usage -=
_visits_lru_cache_list.back().second;
+
_visits_lru_cache_map.erase(_visits_lru_cache_list.back().first);
+ _visits_lru_cache_list.pop_back();
+ }
+ // 1. If true, insert key at the beginning of
_visits_lru_cache_list.
+ // 2. If false, it means total_size > cache _capacity, preventing
this insert.
+ if (_visits_lru_cache_usage + total_size <= _capacity) {
+ _visits_lru_cache_list.emplace_front(visits_key, total_size);
+ _visits_lru_cache_map[visits_key] =
_visits_lru_cache_list.begin();
+ _visits_lru_cache_usage += total_size;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void*
value, size_t charge,
CachePriority priority) {
size_t handle_size = sizeof(LRUHandle) - 1 + key.size();
@@ -409,17 +460,22 @@ Cache::Handle* LRUCache::insert(const CacheKey& key,
uint32_t hash, void* value,
// because charge at this time is no longer the memory size, but an weight.
e->total_size = (_type == LRUCacheType::SIZE ? handle_size + charge :
charge);
e->hash = hash;
- e->refs = 2; // one for the returned handle, one for LRUCache.
+ e->refs = 1; // only one for the returned handle.
e->next = e->prev = nullptr;
- e->in_cache = true;
+ e->in_cache = false;
e->priority = priority;
e->type = _type;
memcpy(e->key_data, key.data(), key.size());
e->last_visit_time = UnixMillis();
+
LRUHandle* to_remove_head = nullptr;
{
std::lock_guard l(_mutex);
+ if (_is_lru_k && _lru_k_insert_visits_list(e->total_size, hash)) {
+ return reinterpret_cast<Cache::Handle*>(e);
+ }
+
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty
if (_cache_value_check_timestamp) {
@@ -431,12 +487,21 @@ Cache::Handle* LRUCache::insert(const CacheKey& key,
uint32_t hash, void* value,
// insert into the cache
// note that the cache might get larger than its capacity if not enough
// space was freed
- auto old = _table.insert(e);
+ auto* old = _table.insert(e);
+ e->in_cache = true;
_usage += e->total_size;
+ e->refs++; // one for the returned handle, one for LRUCache.
if (old != nullptr) {
old->in_cache = false;
+ // `entry->in_cache = false` and `_usage -= entry->total_size;`
and `_unref(entry)` should appear together.
+ // Whether the reference of the old entry is 0, the cache usage is
subtracted here,
+ // because the old entry has been removed from the cache and
should not be counted in the cache capacity,
+ // but the memory of the old entry is still tracked by the cache
memory_tracker.
+ // After all the old handles are released, the old entry will be
freed and the memory of the old entry
+ // will be released from the cache memory_tracker.
+ _usage -= old->total_size;
+ // if false, old entry is being used externally, just ref-- and
sub _usage,
if (_unref(old)) {
- _usage -= old->total_size;
// old is on LRU because it's in cache and its reference count
// was just 1 (Unref returned 0)
_lru_remove(old);
@@ -465,14 +530,15 @@ void LRUCache::erase(const CacheKey& key, uint32_t hash) {
e = _table.remove(key, hash);
if (e != nullptr) {
last_ref = _unref(e);
- if (last_ref) {
- _usage -= e->total_size;
- if (e->in_cache) {
- // locate in free list
- _lru_remove(e);
- }
+ // if last_ref is false or in_cache is false, e must not be in lru
+ if (last_ref && e->in_cache) {
+ // locate in free list
+ _lru_remove(e);
}
e->in_cache = false;
+ // `entry->in_cache = false` and `_usage -= entry->total_size;`
and `_unref(entry)` should appear together.
+ // see the comment for old entry in `LRUCache::insert`.
+ _usage -= e->total_size;
}
}
// free handle out of mutex, when last_ref is true, e must not be nullptr
@@ -565,7 +631,8 @@ inline uint32_t ShardedLRUCache::_hash_slice(const
CacheKey& s) {
}
ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity,
LRUCacheType type,
- uint32_t num_shards, uint32_t
total_element_count_capacity)
+ uint32_t num_shards, uint32_t
total_element_count_capacity,
+ bool is_lru_k)
: _name(name),
_num_shard_bits(Bits::FindLSBSetNonZero(num_shards)),
_num_shards(num_shards),
@@ -581,7 +648,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name,
size_t capacity, LRUCa
(total_element_count_capacity + (_num_shards - 1)) / _num_shards;
LRUCache** shards = new (std::nothrow) LRUCache*[_num_shards];
for (int s = 0; s < _num_shards; s++) {
- shards[s] = new LRUCache(type);
+ shards[s] = new LRUCache(type, is_lru_k);
shards[s]->set_capacity(per_shard);
shards[s]->set_element_count_capacity(per_shard_element_count_capacity);
}
@@ -610,8 +677,9 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name,
size_t capacity, LRUCa
uint32_t num_shards,
CacheValueTimeExtractor
cache_value_time_extractor,
bool cache_value_check_timestamp,
- uint32_t total_element_count_capacity)
- : ShardedLRUCache(name, capacity, type, num_shards,
total_element_count_capacity) {
+ uint32_t total_element_count_capacity, bool
is_lru_k)
+ : ShardedLRUCache(name, capacity, type, num_shards,
total_element_count_capacity,
+ is_lru_k) {
for (int s = 0; s < _num_shards; s++) {
_shards[s]->set_cache_value_time_extractor(cache_value_time_extractor);
_shards[s]->set_cache_value_check_timestamp(cache_value_check_timestamp);
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index c285555d916..0b7da8754ff 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -26,30 +26,6 @@
namespace doris {
-#define OLAP_CACHE_STRING_TO_BUF(cur, str, r_len) \
- do { \
- if (r_len > str.size()) { \
- memcpy(cur, str.c_str(), str.size()); \
- r_len -= str.size(); \
- cur += str.size(); \
- } else { \
- LOG(WARNING) << "construct cache key buf not enough."; \
- return CacheKey(nullptr, 0); \
- } \
- } while (0)
-
-#define OLAP_CACHE_NUMERIC_TO_BUF(cur, numeric, r_len) \
- do { \
- if (r_len > sizeof(numeric)) { \
- memcpy(cur, &numeric, sizeof(numeric)); \
- r_len -= sizeof(numeric); \
- cur += sizeof(numeric); \
- } else { \
- LOG(WARNING) << "construct cache key buf not enough."; \
- return CacheKey(nullptr, 0); \
- } \
- } while (0)
-
class Cache;
class LRUCachePolicy;
struct LRUHandle;
@@ -62,6 +38,7 @@ enum LRUCacheType {
static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
+static constexpr bool DEFAULT_LRU_CACHE_IS_LRU_K = false;
class CacheKey {
public:
@@ -180,6 +157,10 @@ public:
//
// When the inserted entry is no longer needed, the key and
// value will be passed to "deleter".
+ //
+ // if cache is lru k and cache is full, first insert of key will not
succeed.
+ //
+ // Note: if is ShardedLRUCache, cache capacity = ShardedLRUCache_capacity
/ num_shards.
virtual Handle* insert(const CacheKey& key, void* value, size_t charge,
CachePriority priority = CachePriority::NORMAL) = 0;
@@ -326,9 +307,12 @@ using LRUHandleSortedSet = std::set<std::pair<int64_t,
LRUHandle*>>;
// A single shard of sharded cache.
class LRUCache {
public:
- LRUCache(LRUCacheType type);
+ LRUCache(LRUCacheType type, bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K);
~LRUCache();
+ using visits_lru_cache_key = uint32_t;
+ using visits_lru_cache_pair = std::pair<visits_lru_cache_key, size_t>;
+
// Separate from constructor so caller can easily make an array of LRUCache
PrunedInfo set_capacity(size_t capacity);
void set_element_count_capacity(uint32_t element_count_capacity) {
@@ -362,6 +346,7 @@ private:
void _evict_from_lru_with_time(size_t total_size, LRUHandle**
to_remove_head);
void _evict_one_entry(LRUHandle* e);
bool _check_element_count_limit();
+ bool _lru_k_insert_visits_list(size_t total_size, visits_lru_cache_key
visits_key);
private:
LRUCacheType _type;
@@ -391,6 +376,12 @@ private:
LRUHandleSortedSet _sorted_durable_entries_with_timestamp;
uint32_t _element_count_capacity = 0;
+
+ bool _is_lru_k = false; // LRU-K algorithm, K=2
+ std::list<visits_lru_cache_pair> _visits_lru_cache_list;
+ std::unordered_map<visits_lru_cache_key,
std::list<visits_lru_cache_pair>::iterator>
+ _visits_lru_cache_map;
+ size_t _visits_lru_cache_usage = 0;
};
class ShardedLRUCache : public Cache {
@@ -415,11 +406,12 @@ private:
friend class LRUCachePolicy;
explicit ShardedLRUCache(const std::string& name, size_t capacity,
LRUCacheType type,
- uint32_t num_shards, uint32_t
element_count_capacity);
+ uint32_t num_shards, uint32_t
element_count_capacity, bool is_lru_k);
explicit ShardedLRUCache(const std::string& name, size_t capacity,
LRUCacheType type,
uint32_t num_shards,
CacheValueTimeExtractor
cache_value_time_extractor,
- bool cache_value_check_timestamp, uint32_t
element_count_capacity);
+ bool cache_value_check_timestamp, uint32_t
element_count_capacity,
+ bool is_lru_k);
void update_cache_metrics() const;
diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h
index 32b6683e782..2b123e83c1e 100644
--- a/be/src/olap/page_cache.h
+++ b/be/src/olap/page_cache.h
@@ -97,7 +97,8 @@ public:
DataPageCache(size_t capacity, uint32_t num_shards)
: LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE,
capacity,
LRUCacheType::SIZE,
config::data_page_cache_stale_sweep_time_sec,
- num_shards) {}
+ num_shards,
DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) {
+ }
};
class IndexPageCache : public LRUCachePolicy {
diff --git a/be/src/runtime/memory/lru_cache_policy.h
b/be/src/runtime/memory/lru_cache_policy.h
index 83c7f46585a..63b2abe2118 100644
--- a/be/src/runtime/memory/lru_cache_policy.h
+++ b/be/src/runtime/memory/lru_cache_policy.h
@@ -36,13 +36,13 @@ public:
LRUCachePolicy(CacheType type, size_t capacity, LRUCacheType
lru_cache_type,
uint32_t stale_sweep_time_s, uint32_t num_shards =
DEFAULT_LRU_CACHE_NUM_SHARDS,
uint32_t element_count_capacity =
DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY,
- bool enable_prune = true)
+ bool enable_prune = true, bool is_lru_k =
DEFAULT_LRU_CACHE_IS_LRU_K)
: CachePolicy(type, capacity, stale_sweep_time_s, enable_prune),
_lru_cache_type(lru_cache_type) {
if (check_capacity(capacity, num_shards)) {
_cache = std::shared_ptr<ShardedLRUCache>(
new ShardedLRUCache(type_string(type), capacity,
lru_cache_type, num_shards,
- element_count_capacity));
+ element_count_capacity, is_lru_k));
} else {
CHECK(ExecEnv::GetInstance()->get_dummy_lru_cache());
_cache = ExecEnv::GetInstance()->get_dummy_lru_cache();
@@ -55,14 +55,15 @@ public:
uint32_t stale_sweep_time_s, uint32_t num_shards,
uint32_t element_count_capacity,
CacheValueTimeExtractor cache_value_time_extractor,
- bool cache_value_check_timestamp, bool enable_prune = true)
+ bool cache_value_check_timestamp, bool enable_prune = true,
+ bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K)
: CachePolicy(type, capacity, stale_sweep_time_s, enable_prune),
_lru_cache_type(lru_cache_type) {
if (check_capacity(capacity, num_shards)) {
_cache = std::shared_ptr<ShardedLRUCache>(
new ShardedLRUCache(type_string(type), capacity,
lru_cache_type, num_shards,
cache_value_time_extractor,
cache_value_check_timestamp,
- element_count_capacity));
+ element_count_capacity, is_lru_k));
} else {
CHECK(ExecEnv::GetInstance()->get_dummy_lru_cache());
_cache = ExecEnv::GetInstance()->get_dummy_lru_cache();
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index 4ed14280e4c..f3aa71d03a0 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -509,11 +509,12 @@ Status FunctionLikeBase::execute_impl(FunctionContext*
context, Block& block,
size_t input_rows_count) const {
const auto values_col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- const auto* values = check_and_get_column<ColumnString>(values_col.get());
+ const auto* values =
+ assert_cast<const ColumnString*,
TypeCheckOnRelease::DISABLE>(values_col.get());
- if (!values) {
- return Status::InternalError("Not supported input arguments types");
- }
+ // if (!values) {
+ // return Status::InternalError("Not supported input arguments
types");
+ // }
// result column
auto res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = res->get_data();
@@ -578,9 +579,7 @@ Status FunctionLikeBase::execute_substring(const
ColumnString::Chars& values,
}
/// We check that the entry does not pass through the boundaries of
strings.
- if (pos + needle_size <= begin + value_offsets[i]) {
- result[i] = 1;
- }
+ result[i] = pos + needle_size <= begin + value_offsets[i];
// move to next string offset
pos = begin + value_offsets[i];
diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp
index 8c260d69755..64fe925f15f 100644
--- a/be/test/olap/lru_cache_test.cpp
+++ b/be/test/olap/lru_cache_test.cpp
@@ -24,6 +24,7 @@
#include <iosfwd>
#include <vector>
+#include "gtest/gtest.h"
#include "gtest/gtest_pred_impl.h"
#include "runtime/memory/lru_cache_policy.h"
#include "runtime/memory/lru_cache_value_base.h"
@@ -105,6 +106,9 @@ public:
// there is 16 shards in ShardedLRUCache
// And the LRUHandle size is about 100B. So the cache size should big
enough
// to run the UT.
+ // kCacheSize needs to be an even number. if odd number, the cache will
behave correctly,
+ // but the UT Test will fail because check(capacity / 2) will fail.
+ // In fact, Cache will waste an entry space.
static const int kCacheSize = 1000 * 16;
std::vector<int> _deleted_keys;
std::vector<int> _deleted_values;
@@ -325,7 +329,74 @@ TEST_F(CacheTest, Usage) {
CacheKey key7("950");
insert_LRUCache(cache, key7, 950, CachePriority::DURABLE);
- ASSERT_EQ(0, cache.get_usage()); // evict 298 698, because 950 + 98 >
1040, so insert failed
+ ASSERT_EQ(
+ 0,
+ cache.get_usage()); // evict 298 698, because 950 + 98 > 1040,
data was freed when handle release.
+
+ CacheKey key8("900");
+ insert_LRUCache(cache, key8, 900, CachePriority::NORMAL);
+ ASSERT_EQ(998, cache.get_usage()); // 900 + 98 < 1050
+}
+
+TEST_F(CacheTest, UsageLRUK) {
+ LRUCache cache(LRUCacheType::SIZE, true);
+ cache.set_capacity(1050);
+
+ // The lru usage is handle_size + charge.
+ // handle_size = sizeof(handle) - 1 + key size = 96 - 1 + 3 = 98
+ CacheKey key1("100");
+ insert_LRUCache(cache, key1, 100, CachePriority::NORMAL);
+ ASSERT_EQ(198, cache.get_usage()); // 100 + 98
+
+ CacheKey key2("200");
+ insert_LRUCache(cache, key2, 200, CachePriority::DURABLE);
+ ASSERT_EQ(496, cache.get_usage()); // 198 + 298(d), d = DURABLE
+
+ CacheKey key3("300");
+ insert_LRUCache(cache, key3, 300, CachePriority::NORMAL);
+ ASSERT_EQ(894, cache.get_usage()); // 198 + 298(d) + 398
+
+ CacheKey key4("400");
+ insert_LRUCache(cache, key4, 400, CachePriority::NORMAL);
+ // Data cache is full, not insert, visits lru cache not exist key=498(400
+ 98) and insert it.
+ ASSERT_EQ(894, cache.get_usage());
+
+ insert_LRUCache(cache, key4, 400, CachePriority::NORMAL);
+ // Data cache 298(d) + 498, evict 198 398. visits lru cache exist key=498
+ // and erase from visits lru cache, insert to Data cache.
+ ASSERT_EQ(796, cache.get_usage());
+
+ CacheKey key5("500");
+ insert_LRUCache(cache, key5, 500, CachePriority::NORMAL);
+ // Data cache is full, not insert, visits lru cache not exist key=598(500
+ 98) and insert it.
+ ASSERT_EQ(796, cache.get_usage());
+
+ CacheKey key6("600");
+ insert_LRUCache(cache, key6, 600, CachePriority::NORMAL);
+ // Data cache is full, not insert, visits lru cache not exist key=698(600
+ 98) and insert it,
+ // visits lru cache is full, evict key=598 from visits lru cache.
+ ASSERT_EQ(796, cache.get_usage());
+
+ insert_LRUCache(cache, key5, 500, CachePriority::NORMAL);
+ // Data cache is full, not insert, visits lru cache not exist key=598 and
insert it.
+ // visits lru cache is full, evict key=698 from visits lru cache.
+ ASSERT_EQ(796, cache.get_usage());
+
+ insert_LRUCache(cache, key5, 500, CachePriority::NORMAL);
+ // Data cache 298(d) + 598, evict 498. visits lru cache exist key=598
+ // and erase from visits lru cache, insert to Data cache.
+ ASSERT_EQ(896, cache.get_usage());
+
+ CacheKey key7("980");
+ insert_LRUCache(cache, key7, 980, CachePriority::DURABLE);
+ // Data cache is full, not insert, visits lru cache not exist key=1078(980
+ 98)
+ // but 1078 > capacity(1050), not insert visits lru cache.
+ ASSERT_EQ(896, cache.get_usage());
+
+ insert_LRUCache(cache, key7, 980, CachePriority::DURABLE);
+ // Ssame as above, data cache is full, not insert, visits lru cache not
exist key=1078(980 + 98)
+ // but 1078 > capacity(1050), not insert visits lru cache.
+ ASSERT_EQ(896, cache.get_usage());
}
TEST_F(CacheTest, Prune) {
@@ -661,26 +732,51 @@ TEST_F(CacheTest, SetCapacity) {
cache()->get_usage()); // Handle not be released, so key cannot
be evicted.
for (int i = 0; i < kCacheSize; i++) {
+ // The Key exists in the Cache, remove the old Entry from the Cache,
and insert it again.
Insert(i + kCacheSize, 2000 + i, 1);
- EXPECT_EQ(-1, Lookup(i + kCacheSize)); // Cache is full, insert failed.
+ if (i < kCacheSize / 2) {
+ // Insert() will replace the entry with the same key in the cache,
the replaced entry will
+ // not be freed because there are unreleased handles holding them.
+ // The current cache capacity(kCacheSize/2) is half of the cache
usage(kCacheSize),
+ // Insert() method will immediately release the handle of the
newly inserted entry,
+ // so the newly inserted entry will be freed, until cache usage is
less than or equal to capacity.
+ ASSERT_GE(cache()->get_usage(), cache()->get_capacity());
+ EXPECT_EQ(-1, Lookup(i + kCacheSize));
+ } else if (i == kCacheSize / 2) {
+ // When cache usage is equal to cache capacity, Insert() will
replace the old entry
+ // with the same key and will not free the entry after releasing
the handle.
+ ASSERT_EQ(cache()->get_usage(), cache()->get_capacity());
+ EXPECT_EQ(2000 + i, Lookup(i + kCacheSize));
+ } else {
+ // When inserting at `i == kCacheSize / 2 + 1`, the cache usage is
equal to the cache capacity,
+ // so the entry in the LRU list will be evicted (usage - 1) and
then inserted (usage + 1).
+ // because the entry inserted is an existing key, the old entry
with the same key is evicted (usage - 1),
+ // so the final cache usage is equal to (capacity - 1).
+ ASSERT_EQ(cache()->get_usage(), cache()->get_capacity() - 1);
+ EXPECT_EQ(2000 + i, Lookup(i + kCacheSize));
+ }
}
ASSERT_EQ(kCacheSize / 2, cache()->get_capacity());
- ASSERT_EQ(kCacheSize, cache()->get_usage());
+ // Here cache usage equals cache capacity - 1, because the entry inserted
in the previous step
+ // at `i == kCacheSize / 2 + 1` was evicted, see the reason above.
+ // Entries held by unreleased handles in `handles` will not be counted in
cache usage,
+ // but will still be counted in the memory tracker.
+ ASSERT_EQ(kCacheSize / 2 - 1, cache()->get_usage());
cache()->adjust_capacity_weighted(2);
ASSERT_EQ(kCacheSize * 2, cache()->get_capacity());
- ASSERT_EQ(kCacheSize, cache()->get_usage());
+ ASSERT_EQ(kCacheSize / 2 - 1, cache()->get_usage());
for (int i = 0; i < kCacheSize; i++) {
Insert(i, 3000 + i, 1);
EXPECT_EQ(3000 + i, Lookup(i));
}
ASSERT_EQ(kCacheSize * 2, cache()->get_capacity());
- ASSERT_EQ(kCacheSize * 2, cache()->get_usage());
+ ASSERT_EQ(kCacheSize * 1.5 - 1, cache()->get_usage());
cache()->adjust_capacity_weighted(0);
ASSERT_EQ(0, cache()->get_capacity());
- ASSERT_EQ(kCacheSize, cache()->get_usage());
+ ASSERT_EQ(0, cache()->get_usage());
for (auto it : handles) {
cache()->release(it);
diff --git a/be/test/olap/page_cache_test.cpp b/be/test/olap/page_cache_test.cpp
index 1feb6152add..a6b9300c105 100644
--- a/be/test/olap/page_cache_test.cpp
+++ b/be/test/olap/page_cache_test.cpp
@@ -71,7 +71,8 @@ TEST_F(StoragePageCacheTest, data_page_only) {
EXPECT_TRUE(found);
}
- // put too many page to eliminate first page
+ // Page Cache is LRU-K, K=2.
+ // Put too many page, after cache is full, no key is inserted twice and no
evict occurs.
for (int i = 0; i < 10 * kNumShards; ++i) {
StoragePageCache::CacheKey key("bcd", 0, i);
PageCacheHandle handle;
@@ -95,6 +96,27 @@ TEST_F(StoragePageCacheTest, data_page_only) {
EXPECT_FALSE(found);
}
+ // After cache is full, no key is inserted twice and no evict occurs.
+ {
+ PageCacheHandle handle;
+ auto found = cache.lookup(key, &handle, page_type);
+ EXPECT_TRUE(found);
+ }
+
+ // put too many page twice to eliminate first page
+ for (int i = 0; i < 10 * kNumShards; ++i) {
+ StoragePageCache::CacheKey key("bcde", 0, i);
+ PageCacheHandle handle;
+ auto* data = new DataPage(1024, true, page_type);
+ cache.insert(key, data, &handle, page_type, false);
+ auto found = cache.lookup(key, &handle, page_type); // after handle
destruct, free data.
+ EXPECT_FALSE(found);
+ data = new DataPage(1024, true, page_type);
+ cache.insert(key, data, &handle, page_type, false);
+ found = cache.lookup(key, &handle, page_type);
+ EXPECT_TRUE(found);
+ }
+
// cache miss for eliminated key
{
PageCacheHandle handle;
@@ -253,12 +275,13 @@ TEST_F(StoragePageCacheTest, mixed_pages) {
EXPECT_FALSE(found_index);
}
- // cache miss for eliminated key
{
PageCacheHandle data_handle, index_handle;
auto found_data = cache.lookup(data_key, &data_handle, page_type_data);
auto found_index = cache.lookup(index_key, &index_handle,
page_type_index);
- EXPECT_FALSE(found_data);
+ // after cache is full, no key is inserted twice and no evict occurs
+ EXPECT_TRUE(found_data);
+ // cache miss for eliminated key
EXPECT_FALSE(found_index);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]