This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 358f0f172 KUDU-613: Cleanup of cache code
358f0f172 is described below
commit 358f0f172609e1a0ea359eb4e8118d0584926b3d
Author: Mahesh Reddy <[email protected]>
AuthorDate: Tue Feb 6 15:04:38 2024 -0500
KUDU-613: Cleanup of cache code
This patch moves some classes out of the
anonymous namespace and into the headers
of the cache and nvm_cache files. These
classes will be used by the new SLRU cache.
This path also templatizes the HandleTable class
to be used by both the cache and nvm_cache files.
Change-Id: I506d4577c0ae873b01d7fa4f53846d6fd0f664cf
Reviewed-on: http://gerrit.cloudera.org:8080/21018
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/util/cache.cc | 147 +++-------------------------------------
src/kudu/util/cache.h | 151 +++++++++++++++++++++++++++++++++++++----
src/kudu/util/file_cache.cc | 1 +
src/kudu/util/nvm_cache.cc | 162 +++++++-------------------------------------
src/kudu/util/nvm_cache.h | 32 +++++++++
5 files changed, 206 insertions(+), 287 deletions(-)
diff --git a/src/kudu/util/cache.cc b/src/kudu/util/cache.cc
index d141caca7..c0156fe12 100644
--- a/src/kudu/util/cache.cc
+++ b/src/kudu/util/cache.cc
@@ -45,6 +45,7 @@ DEFINE_double(cache_memtracker_approximation_ratio, 0.01,
"this ratio to improve performance. For tests.");
TAG_FLAG(cache_memtracker_approximation_ratio, hidden);
+using RLHandle = kudu::Cache::RLHandle;
using std::atomic;
using std::shared_ptr;
using std::string;
@@ -68,130 +69,6 @@ const Cache::IterationFunc
Cache::kIterateOverAllEntriesFunc = [](
namespace {
-// Recency list cache implementations (FIFO, LRU, etc.)
-
-// Recency list handle. An entry is a variable length heap-allocated structure.
-// Entries are kept in a circular doubly linked list ordered by some recency
-// criterion (e.g., access time for LRU policy, insertion time for FIFO
policy).
-struct RLHandle {
- Cache::EvictionCallback* eviction_callback;
- RLHandle* next_hash;
- RLHandle* next;
- RLHandle* prev;
- size_t charge; // TODO(opt): Only allow uint32_t?
- uint32_t key_length;
- uint32_t val_length;
- std::atomic<int32_t> refs;
- uint32_t hash; // Hash of key(); used for fast sharding and comparisons
-
- // The storage for the key/value pair itself. The data is stored as:
- // [key bytes ...] [padding up to 8-byte boundary] [value bytes ...]
- uint8_t kv_data[1]; // Beginning of key/value pair
-
- Slice key() const {
- return Slice(kv_data, key_length);
- }
-
- uint8_t* mutable_val_ptr() {
- int val_offset = KUDU_ALIGN_UP(key_length, sizeof(void*));
- return &kv_data[val_offset];
- }
-
- const uint8_t* val_ptr() const {
- return const_cast<RLHandle*>(this)->mutable_val_ptr();
- }
-
- Slice value() const {
- return Slice(val_ptr(), val_length);
- }
-};
-
-// We provide our own simple hash table since it removes a whole bunch
-// of porting hacks and is also faster than some of the built-in hash
-// table implementations in some of the compiler/runtime combinations
-// we have tested. E.g., readrandom speeds up by ~5% over the g++
-// 4.4.3's builtin hashtable.
-class HandleTable {
- public:
- HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
- ~HandleTable() { delete[] list_; }
-
- RLHandle* Lookup(const Slice& key, uint32_t hash) {
- return *FindPointer(key, hash);
- }
-
- RLHandle* Insert(RLHandle* h) {
- RLHandle** ptr = FindPointer(h->key(), h->hash);
- RLHandle* old = *ptr;
- h->next_hash = (old == nullptr ? nullptr : old->next_hash);
- *ptr = h;
- if (old == nullptr) {
- ++elems_;
- if (elems_ > length_) {
- // Since each cache entry is fairly large, we aim for a small
- // average linked list length (<= 1).
- Resize();
- }
- }
- return old;
- }
-
- RLHandle* Remove(const Slice& key, uint32_t hash) {
- RLHandle** ptr = FindPointer(key, hash);
- RLHandle* result = *ptr;
- if (result != nullptr) {
- *ptr = result->next_hash;
- --elems_;
- }
- return result;
- }
-
- private:
- // The table consists of an array of buckets where each bucket is
- // a linked list of cache entries that hash into the bucket.
- uint32_t length_;
- uint32_t elems_;
- RLHandle** list_;
-
- // Return a pointer to slot that points to a cache entry that
- // matches key/hash. If there is no such cache entry, return a
- // pointer to the trailing slot in the corresponding linked list.
- RLHandle** FindPointer(const Slice& key, uint32_t hash) {
- RLHandle** ptr = &list_[hash & (length_ - 1)];
- while (*ptr != nullptr &&
- ((*ptr)->hash != hash || key != (*ptr)->key())) {
- ptr = &(*ptr)->next_hash;
- }
- return ptr;
- }
-
- void Resize() {
- uint32_t new_length = 16;
- while (new_length < elems_ * 1.5) {
- new_length *= 2;
- }
- auto new_list = new RLHandle*[new_length];
- memset(new_list, 0, sizeof(new_list[0]) * new_length);
- uint32_t count = 0;
- for (uint32_t i = 0; i < length_; i++) {
- RLHandle* h = list_[i];
- while (h != nullptr) {
- RLHandle* next = h->next_hash;
- uint32_t hash = h->hash;
- RLHandle** ptr = &new_list[hash & (new_length - 1)];
- h->next_hash = *ptr;
- *ptr = h;
- h = next;
- count++;
- }
- }
- DCHECK_EQ(elems_, count);
- delete[] list_;
- list_ = new_list;
- length_ = new_length;
- }
-};
-
string ToString(Cache::EvictionPolicy p) {
switch (p) {
case Cache::EvictionPolicy::FIFO:
@@ -212,7 +89,7 @@ class CacheShard {
explicit CacheShard(MemTracker* tracker);
~CacheShard();
- // Separate from constructor so caller can easily make an array of CacheShard
+ // Separate from constructor so caller can easily make an array of
CacheShard.
void SetCapacity(size_t capacity) {
capacity_ = capacity;
max_deferred_consumption_ = capacity *
FLAGS_cache_memtracker_approximation_ratio;
@@ -270,10 +147,10 @@ class CacheShard {
size_t usage_;
// Dummy head of recency list.
- // rl.prev is newest entry, rl.next is oldest entry.
+ // rl.prev is the newest entry, rl.next is the oldest entry.
RLHandle rl_;
- HandleTable table_;
+ Cache::HandleTable<RLHandle> table_;
MemTracker* mem_tracker_;
atomic<int64_t> deferred_consumption_ { 0 };
@@ -403,7 +280,7 @@ Cache::Handle* CacheShard<policy>::Lookup(const Slice& key,
}
}
- // Do the metrics outside of the lock.
+ // Do the metrics outside the lock.
UpdateMetricsLookup(e != nullptr, caching);
return reinterpret_cast<Cache::Handle*>(e);
@@ -422,8 +299,7 @@ template<Cache::EvictionPolicy policy>
Cache::Handle* CacheShard<policy>::Insert(
RLHandle* handle,
Cache::EvictionCallback* eviction_callback) {
- // Set the remaining RLHandle members which were not already allocated during
- // Allocate().
+ // Set the remaining RLHandle members which were not already allocated
during Allocate().
handle->eviction_callback = eviction_callback;
// Two refs for the handle: one from CacheShard, one for the returned handle.
handle->refs.store(2, std::memory_order_relaxed);
@@ -459,8 +335,7 @@ Cache::Handle* CacheShard<policy>::Insert(
}
}
- // we free the entries here outside of mutex for
- // performance reasons
+ // We free the entries here outside the mutex for performance reasons.
while (to_remove_head != nullptr) {
RLHandle* next = to_remove_head->next;
FreeEntry(to_remove_head);
@@ -482,8 +357,8 @@ void CacheShard<policy>::Erase(const Slice& key, uint32_t
hash) {
last_reference = Unref(e);
}
}
- // mutex not held here
- // last_reference will only be true if e != NULL
+ // Mutex not held here.
+ // last_reference will only be true if e != NULL.
if (last_reference) {
FreeEntry(e);
}
@@ -524,7 +399,7 @@ size_t CacheShard<policy>::Invalidate(const
Cache::InvalidationControl& ctl) {
}
// Once removed from the lookup table and the recency list, the entries
// with no references left must be deallocated because Cache::Release()
- // wont be called for them from elsewhere.
+ // won't be called for them from elsewhere.
while (to_remove_head != nullptr) {
RLHandle* next = to_remove_head->next;
FreeEntry(to_remove_head);
@@ -602,7 +477,7 @@ class ShardedCache : public Cache {
}
UniqueHandle Insert(UniquePendingHandle handle,
- Cache::EvictionCallback* eviction_callback) override {
+ EvictionCallback* eviction_callback) override {
RLHandle* h =
reinterpret_cast<RLHandle*>(DCHECK_NOTNULL(handle.release()));
return UniqueHandle(
shards_[Shard(h->hash)]->Insert(h, eviction_callback),
diff --git a/src/kudu/util/cache.h b/src/kudu/util/cache.h
index b72f0b48c..61d664d04 100644
--- a/src/kudu/util/cache.h
+++ b/src/kudu/util/cache.h
@@ -17,15 +17,20 @@
#pragma once
+#include <atomic>
#include <cstddef>
#include <cstdint>
+#include <cstring>
#include <functional>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
+#include <glog/logging.h>
+
#include "kudu/gutil/macros.h"
+#include "kudu/util/alignment.h"
#include "kudu/util/slice.h"
namespace kudu {
@@ -41,8 +46,7 @@ class Cache {
};
// Supported eviction policies for the cache. Eviction policy determines what
- // items to evict if the cache is at capacity when trying to accommodate
- // an extra item.
+ // items to evict if the cache is at capacity when trying to accommodate an
extra item.
enum class EvictionPolicy {
// The earliest added items are evicted (a.k.a. queue).
FIFO,
@@ -51,14 +55,138 @@ class Cache {
LRU,
};
- // Callback interface which is called when an entry is evicted from the
- // cache.
+ // Callback interface which is called when an entry is evicted from the
cache.
class EvictionCallback {
public:
virtual ~EvictionCallback() = default;
virtual void EvictedEntry(Slice key, Slice value) = 0;
};
+ // Recency list cache implementations (FIFO, LRU, etc.)
+
+ // Recency list handle. An entry is a variable length heap-allocated
structure.
+ // Entries are kept in a circular doubly linked list ordered by some recency
+ // criterion (e.g., access time for LRU policy, insertion time for FIFO
policy).
+ struct RLHandle {
+ EvictionCallback* eviction_callback;
+ RLHandle* next_hash;
+ RLHandle* next;
+ RLHandle* prev;
+ size_t charge; // TODO(opt): Only allow uint32_t?
+ uint32_t key_length;
+ uint32_t val_length;
+ std::atomic<int32_t> refs;
+ uint32_t hash; // Hash of key(); used for fast sharding and
comparisons
+
+ // The storage for the key/value pair itself. The data is stored as:
+ // [key bytes ...] [padding up to 8-byte boundary] [value bytes ...]
+ uint8_t kv_data[1]; // Beginning of key/value pair
+
+ Slice key() const {
+ return Slice(kv_data, key_length);
+ }
+
+ uint8_t* mutable_val_ptr() {
+ int val_offset = KUDU_ALIGN_UP(key_length, sizeof(void*));
+ return &kv_data[val_offset];
+ }
+
+ const uint8_t* val_ptr() const {
+ return const_cast<RLHandle*>(this)->mutable_val_ptr();
+ }
+
+ Slice value() const {
+ return Slice(val_ptr(), val_length);
+ }
+ };
+
+ // We provide our own simple hash table since it removes a bunch
+ // of porting hacks and is also faster than some built-in hash
+ // table implementations in some compiler/runtime combinations
+ // we have tested. E.g., readrandom speeds up by ~5% over the g++
+ // 4.4.3's builtin hashtable.
+ template <typename HandleType>
+ class HandleTable {
+ public:
+ HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
+ ~HandleTable() { delete[] list_; }
+
+ HandleType* Lookup(const Slice& key, uint32_t hash) {
+ return *FindPointer(key, hash);
+ }
+
+ HandleType* Insert(HandleType* h) {
+ HandleType** ptr = FindPointer(h->key(), h->hash);
+ HandleType* old = *ptr;
+ h->next_hash = (old == nullptr ? nullptr : old->next_hash);
+ *ptr = h;
+ if (old == nullptr) {
+ ++elems_;
+ if (elems_ > length_) {
+ // Since each cache entry is fairly large, we aim for a small
+ // average linked list length (<= 1).
+ Resize();
+ }
+ }
+ return old;
+ }
+
+ HandleType* Remove(const Slice& key, uint32_t hash) {
+ HandleType** ptr = FindPointer(key, hash);
+ HandleType* result = *ptr;
+ if (result != nullptr) {
+ *ptr = result->next_hash;
+ --elems_;
+ }
+ return result;
+ }
+
+ private:
+ // The table consists of an array of buckets where each bucket is
+ // a linked list of cache entries that hash into the bucket.
+ uint32_t length_;
+ uint32_t elems_;
+ HandleType** list_;
+
+ // Return a pointer to slot that points to a cache entry that
+ // matches key/hash. If there is no such cache entry, return a
+ // pointer to the trailing slot in the corresponding linked list.
+ HandleType** FindPointer(const Slice& key, uint32_t hash) {
+ HandleType** ptr = &list_[hash & (length_ - 1)];
+ while (*ptr != nullptr &&
+ ((*ptr)->hash != hash || key != (*ptr)->key())) {
+ ptr = &(*ptr)->next_hash;
+ }
+ return ptr;
+ }
+
+ void Resize() {
+ uint32_t new_length = 16;
+ while (new_length < elems_ * 1.5) {
+ new_length *= 2;
+ }
+ auto* new_list = new HandleType*[new_length];
+ memset(new_list, 0, sizeof(new_list[0]) * new_length);
+ uint32_t count = 0;
+ for (uint32_t i = 0; i < length_; i++) {
+ HandleType* h = list_[i];
+ while (h != nullptr) {
+ HandleType* next = h->next_hash;
+ uint32_t hash = h->hash;
+ HandleType** ptr = &new_list[hash & (new_length - 1)];
+ h->next_hash = *ptr;
+ *ptr = h;
+ h = next;
+ count++;
+ }
+ }
+ DCHECK_EQ(elems_, count);
+ delete[] list_;
+ list_ = new_list;
+ length_ = new_length;
+ }
+ };
+
Cache() = default;
// Destroys all existing entries by calling the "deleter"
@@ -94,7 +222,7 @@ class Cache {
: c_(c) {
}
- void operator()(Cache::Handle* h) const {
+ void operator()(Handle* h) const {
if (h != nullptr) {
c_->Release(h);
}
@@ -122,7 +250,7 @@ class Cache {
: c_(c) {
}
- void operator()(Cache::PendingHandle* h) const {
+ void operator()(PendingHandle* h) const {
if (h != nullptr) {
c_->Free(h);
}
@@ -141,7 +269,7 @@ class Cache {
typedef std::unique_ptr<PendingHandle, PendingHandleDeleter>
UniquePendingHandle;
// Passing EXPECT_IN_CACHE will increment the hit/miss metrics that track
the number of times
- // blocks were requested that the users were hoping to get the block from
the cache, along with
+ // blocks were requested that the users were hoping to get the block from
the cache, along
// with the basic metrics.
// Passing NO_EXPECT_IN_CACHE will only increment the basic metrics.
// This helps in determining if we are effectively caching the blocks that
matter the most.
@@ -179,8 +307,7 @@ class Cache {
// to it have been released.
virtual void Erase(const Slice& key) = 0;
- // Return the value encapsulated in a raw handle returned by a successful
- // Lookup().
+ // Return the value encapsulated in a raw handle returned by a successful
Lookup().
virtual Slice Value(const UniqueHandle& handle) const = 0;
@@ -188,7 +315,7 @@ class Cache {
// Insertion path
// ------------------------------------------------------------
//
- // Because some cache implementations (eg NVM) manage their own memory, and
because we'd
+ // Because some cache implementations (e.g. NVM) manage their own memory,
and because we'd
// like to read blocks directly into cache-managed memory rather than
causing an extra
// memcpy, the insertion of a new element into the cache requires two
phases. First, a
// PendingHandle is allocated with space for the value, and then it is later
inserted.
@@ -216,7 +343,7 @@ class Cache {
//
// If 'charge' is not 'kAutomaticCharge', then the cache capacity will be
charged
// the explicit amount. This is useful when caching items that are small but
need to
- // maintain a bounded count (eg file descriptors) rather than caring about
their actual
+ // maintain a bounded count (e.g. file descriptors) rather than caring about
their actual
// memory usage. It is also useful when caching items for whom calculating
// memory usage is a complex affair (i.e. items containing pointers to
// additional heap allocations).
@@ -258,7 +385,7 @@ class Cache {
// Invalidate cache's entries, effectively evicting non-valid ones from the
// cache. The invalidation process iterates over the cache's recency list(s),
- // from best candidate for eviction to the worst.
+ // from the best candidate for eviction to the worst.
//
// The provided control structure 'ctl' is responsible for the following:
// * determine whether an entry is valid or not
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index a59274b32..61a7beecb 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -25,6 +25,7 @@
#include <mutex>
#include <ostream>
#include <string>
+#include <type_traits>
#include <utility>
#include <vector>
diff --git a/src/kudu/util/nvm_cache.cc b/src/kudu/util/nvm_cache.cc
index 97ea5652f..7784a95ed 100644
--- a/src/kudu/util/nvm_cache.cc
+++ b/src/kudu/util/nvm_cache.cc
@@ -33,10 +33,7 @@
#include <gflags/gflags.h>
#include <glog/logging.h>
-#include "kudu/gutil/atomic_refcount.h"
-#include "kudu/gutil/atomicops.h"
#include "kudu/gutil/bits.h"
-#include "kudu/gutil/dynamic_annotations.h"
#include "kudu/gutil/hash/city.h"
#include "kudu/gutil/macros.h"
#include "kudu/gutil/port.h"
@@ -77,6 +74,7 @@ DEFINE_double(nvm_cache_usage_ratio, 1.25,
"the ratio.");
TAG_FLAG(nvm_cache_usage_ratio, advanced);
+using kudu::util::LRUHandle;
using std::string;
using std::unique_ptr;
using std::vector;
@@ -188,126 +186,13 @@ typedef simple_spinlock MutexType;
// LRU cache implementation
-// An entry is a variable length heap-allocated structure. Entries
-// are kept in a circular doubly linked list ordered by access time.
-struct LRUHandle {
- Cache::EvictionCallback* eviction_callback;
- LRUHandle* next_hash;
- LRUHandle* next;
- LRUHandle* prev;
- size_t charge; // TODO(opt): Only allow uint32_t?
- uint32_t key_length;
- uint32_t val_length;
- Atomic32 refs;
- uint32_t hash; // Hash of key(); used for fast sharding and comparisons
- uint8_t* kv_data;
-
- Slice key() const {
- return Slice(kv_data, key_length);
- }
-
- Slice value() const {
- return Slice(&kv_data[key_length], val_length);
- }
-
- uint8_t* val_ptr() {
- return &kv_data[key_length];
- }
-};
-
-// We provide our own simple hash table since it removes a whole bunch
-// of porting hacks and is also faster than some of the built-in hash
-// table implementations in some of the compiler/runtime combinations
-// we have tested. E.g., readrandom speeds up by ~5% over the g++
-// 4.4.3's builtin hashtable.
-class HandleTable {
- public:
- HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
- ~HandleTable() { delete[] list_; }
-
- LRUHandle* Lookup(const Slice& key, uint32_t hash) {
- return *FindPointer(key, hash);
- }
-
- LRUHandle* Insert(LRUHandle* h) {
- LRUHandle** ptr = FindPointer(h->key(), h->hash);
- LRUHandle* old = *ptr;
- h->next_hash = (old == nullptr ? nullptr : old->next_hash);
- *ptr = h;
- if (old == nullptr) {
- ++elems_;
- if (elems_ > length_) {
- // Since each cache entry is fairly large, we aim for a small
- // average linked list length (<= 1).
- Resize();
- }
- }
- return old;
- }
-
- LRUHandle* Remove(const Slice& key, uint32_t hash) {
- LRUHandle** ptr = FindPointer(key, hash);
- LRUHandle* result = *ptr;
- if (result != nullptr) {
- *ptr = result->next_hash;
- --elems_;
- }
- return result;
- }
-
- private:
- // The table consists of an array of buckets where each bucket is
- // a linked list of cache entries that hash into the bucket.
- uint32_t length_;
- uint32_t elems_;
- LRUHandle** list_;
-
- // Return a pointer to slot that points to a cache entry that
- // matches key/hash. If there is no such cache entry, return a
- // pointer to the trailing slot in the corresponding linked list.
- LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
- LRUHandle** ptr = &list_[hash & (length_ - 1)];
- while (*ptr != nullptr &&
- ((*ptr)->hash != hash || key != (*ptr)->key())) {
- ptr = &(*ptr)->next_hash;
- }
- return ptr;
- }
-
- void Resize() {
- uint32_t new_length = 16;
- while (new_length < elems_ * 1.5) {
- new_length *= 2;
- }
- LRUHandle** new_list = new LRUHandle*[new_length];
- memset(new_list, 0, sizeof(new_list[0]) * new_length);
- uint32_t count = 0;
- for (uint32_t i = 0; i < length_; i++) {
- LRUHandle* h = list_[i];
- while (h != nullptr) {
- LRUHandle* next = h->next_hash;
- uint32_t hash = h->hash;
- LRUHandle** ptr = &new_list[hash & (new_length - 1)];
- h->next_hash = *ptr;
- *ptr = h;
- h = next;
- count++;
- }
- }
- DCHECK_EQ(elems_, count);
- delete[] list_;
- list_ = new_list;
- length_ = new_length;
- }
-};
-
// A single shard of sharded cache.
class NvmLRUCache {
public:
explicit NvmLRUCache(memkind *vmp);
~NvmLRUCache();
- // Separate from constructor so caller can easily make an array of LRUCache
+ // Separate from constructor so caller can easily make an array of LRUCache.
void SetCapacity(size_t capacity) { capacity_ = capacity; }
void SetMetrics(CacheMetrics* metrics) { metrics_ = metrics; }
@@ -325,16 +210,15 @@ class NvmLRUCache {
void NvmLRU_Remove(LRUHandle* e);
void NvmLRU_Append(LRUHandle* e);
// Just reduce the reference count by 1.
- // Return true if last reference
- bool Unref(LRUHandle* e);
+ // Return true if last reference.
+ static bool Unref(LRUHandle* e);
void FreeEntry(LRUHandle* e);
// Evict the LRU item in the cache, adding it to the linked list
// pointed to by 'to_remove_head'.
void EvictOldestUnlocked(LRUHandle** to_remove_head);
- // Free all of the entries in the linked list that has to_free_head
- // as its head.
+ // Free all the entries in the linked list that has to_free_head as its head.
void FreeLRUEntries(LRUHandle* to_free_head);
// Wrapper around memkind_malloc which injects failures based on a flag.
@@ -351,7 +235,7 @@ class NvmLRUCache {
// lru.prev is newest entry, lru.next is oldest entry.
LRUHandle lru_;
- HandleTable table_;
+ Cache::HandleTable<LRUHandle> table_;
memkind* vmp_;
@@ -362,7 +246,7 @@ NvmLRUCache::NvmLRUCache(memkind* vmp)
: usage_(0),
vmp_(vmp),
metrics_(nullptr) {
- // Make empty circular linked list
+ // Make empty circular linked list.
lru_.next = &lru_;
lru_.prev = &lru_;
}
@@ -370,7 +254,8 @@ NvmLRUCache::NvmLRUCache(memkind* vmp)
NvmLRUCache::~NvmLRUCache() {
for (LRUHandle* e = lru_.next; e != &lru_; ) {
LRUHandle* next = e->next;
- DCHECK_EQ(e->refs, 1); // Error if caller has an unreleased handle
+ // Error if caller has an unreleased handle.
+ DCHECK_EQ(e->refs.load(std::memory_order_relaxed), 1);
if (Unref(e)) {
FreeEntry(e);
}
@@ -386,12 +271,12 @@ void* NvmLRUCache::MemkindMalloc(size_t size) {
}
bool NvmLRUCache::Unref(LRUHandle* e) {
- DCHECK_GT(ANNOTATE_UNPROTECTED_READ(e->refs), 0);
- return !base::RefCountDec(&e->refs);
+ DCHECK_GT(e->refs.load(std::memory_order_relaxed), 0);
+ return e->refs.fetch_sub(1) == 1;
}
void NvmLRUCache::FreeEntry(LRUHandle* e) {
- DCHECK_EQ(ANNOTATE_UNPROTECTED_READ(e->refs), 0);
+ DCHECK_EQ(e->refs.load(std::memory_order_relaxed), 0);
if (e->eviction_callback) {
e->eviction_callback->EvictedEntry(e->key(), e->value());
}
@@ -414,7 +299,7 @@ void NvmLRUCache::NvmLRU_Remove(LRUHandle* e) {
}
void NvmLRUCache::NvmLRU_Append(LRUHandle* e) {
- // Make "e" newest entry by inserting just before lru_
+ // Make "e" newest entry by inserting just before lru_.
e->next = &lru_;
e->prev = lru_.prev;
e->prev->next = e;
@@ -423,20 +308,20 @@ void NvmLRUCache::NvmLRU_Append(LRUHandle* e) {
}
Cache::Handle* NvmLRUCache::Lookup(const Slice& key, uint32_t hash, bool
caching) {
- LRUHandle* e;
+ LRUHandle* e;
{
std::lock_guard<MutexType> l(mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
// If an entry exists, remove the old entry from the cache
// and re-add to the end of the linked list.
- base::RefCountInc(&e->refs);
+ e->refs.fetch_add(1, std::memory_order_relaxed);
NvmLRU_Remove(e);
NvmLRU_Append(e);
}
}
- // Do the metrics outside of the lock.
+ // Do the metrics outside the lock.
if (metrics_) {
metrics_->lookups->Increment();
bool was_hit = (e != nullptr);
@@ -489,7 +374,8 @@ Cache::Handle* NvmLRUCache::Insert(LRUHandle* e,
DCHECK(e);
LRUHandle* to_remove_head = nullptr;
- e->refs = 2; // One from LRUCache, one for the returned handle
+ // One from LRUCache, one for the returned handle.
+ e->refs.store(2, std::memory_order_relaxed);
e->eviction_callback = eviction_callback;
if (PREDICT_TRUE(metrics_)) {
metrics_->cache_usage->IncrementBy(e->charge);
@@ -515,8 +401,7 @@ Cache::Handle* NvmLRUCache::Insert(LRUHandle* e,
}
}
- // we free the entries here outside of mutex for
- // performance reasons
+ // We free the entries here outside the mutex for performance reasons.
FreeLRUEntries(to_remove_head);
return reinterpret_cast<Cache::Handle*>(e);
@@ -533,8 +418,8 @@ void NvmLRUCache::Erase(const Slice& key, uint32_t hash) {
last_reference = Unref(e);
}
}
- // mutex not held here
- // last_reference will only be true if e != nullptr
+ // Mutex not held here.
+ // last_reference will only be true if e != nullptr.
if (last_reference) {
FreeEntry(e);
}
@@ -654,7 +539,7 @@ class ShardedLRUCache : public Cache {
return reinterpret_cast<const LRUHandle*>(handle.get())->value();
}
uint8_t* MutableValue(UniquePendingHandle* handle) override {
- return reinterpret_cast<LRUHandle*>(handle->get())->val_ptr();
+ return reinterpret_cast<LRUHandle*>(handle->get())->mutable_val_ptr();
}
void SetMetrics(unique_ptr<CacheMetrics> metrics,
@@ -674,8 +559,7 @@ class ShardedLRUCache : public Cache {
DCHECK_GE(val_len, 0);
// Try allocating from each of the shards -- if memkind is tight,
- // this can cause eviction, so we might have better luck in different
- // shards.
+ // this can cause eviction, so we might have better luck in different
shards.
for (const auto& shard : shards_) {
UniquePendingHandle ph(static_cast<PendingHandle*>(
shard->Allocate(sizeof(LRUHandle) + key_len + val_len)),
diff --git a/src/kudu/util/nvm_cache.h b/src/kudu/util/nvm_cache.h
index 4a302f2e6..d1a89801a 100644
--- a/src/kudu/util/nvm_cache.h
+++ b/src/kudu/util/nvm_cache.h
@@ -16,14 +16,46 @@
// under the License.
#pragma once
+#include <atomic>
#include <cstddef>
+#include <cstdint>
#include <string>
#include <glog/logging.h>
#include "kudu/util/cache.h"
+#include "kudu/util/slice.h"
namespace kudu {
+namespace util {
+
+// An entry is a variable length heap-allocated structure. Entries
+// are kept in a circular doubly linked list ordered by access time.
+struct LRUHandle {
+ Cache::EvictionCallback* eviction_callback;
+ LRUHandle* next_hash;
+ LRUHandle* next;
+ LRUHandle* prev;
+ size_t charge; // TODO(opt): Only allow uint32_t?
+ uint32_t key_length;
+ uint32_t val_length;
+ std::atomic<int32_t> refs;
+ uint32_t hash; // Hash of key(); used for fast sharding and comparisons
+ uint8_t* kv_data;
+
+ Slice key() const {
+ return Slice(kv_data, key_length);
+ }
+
+ Slice value() const {
+ return Slice(&kv_data[key_length], val_length);
+ }
+
+ uint8_t* mutable_val_ptr() const {
+ return &kv_data[key_length];
+ }
+};
+} // namespace util
// Convenience macro for invoking CanUseNVMCacheForTests.
#define RETURN_IF_NO_NVM_CACHE(memory_type) do { \