This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new b8b080651 KUDU-613: SLRU Cache Benchmark
b8b080651 is described below
commit b8b080651e528080b8b4b5b58c6f1021b2dd01da
Author: Mahesh Reddy <[email protected]>
AuthorDate: Thu Jun 13 16:01:01 2024 -0400
KUDU-613: SLRU Cache Benchmark
This patch introduces a new benchmark that
validates the performance of the SLRU cache.
The pattern this benchmark follows is frequent lookups
for a small set of keys followed by lookups for rarely
accessed keys with large values that would normally
evict the frequently accessed keys from the LRU
cache. As the results below show, SLRU cache
performs better with this workload pattern.
Ran benchmarks for RELEASE build locally on macOS.
6 cores and 2.6GHz.
Results:
Test case | Algorithm | Lookups/sec | Hit rate
ZIPFIAN ratio=1.00x | LRU | 11.43M | 99.8%
ZIPFIAN ratio=1.00x | SLRU | 10.68M | 98.6%
ZIPFIAN ratio=3.00x | LRU | 11.43M | 95.8%
ZIPFIAN ratio=3.00x | SLRU | 10.07M | 93.4%
UNIFORM ratio=1.00x | LRU | 9.31M | 99.7%
UNIFORM ratio=1.00x | SLRU | 7.51M | 99.7%
UNIFORM ratio=3.00x | LRU | 6.80M | 33.3%
UNIFORM ratio=3.00x | SLRU | 5.09M | 11.0%
PRE_DETERMINED ratio=1.00x | LRU | 17.73M | 93.1%
PRE_DETERMINED ratio=1.00x | SLRU | 18.58M | 99.2%
PRE_DETERMINED ratio=3.00x | LRU | 17.07M | 93.1%
PRE_DETERMINED ratio=3.00x | SLRU | 19.12M | 99.1%
Change-Id: I1c128a9f047497373ce3e740056eaa89a352261b
Reviewed-on: http://gerrit.cloudera.org:8080/21601
Reviewed-by: Alexey Serbin <[email protected]>
Tested-by: Alexey Serbin <[email protected]>
---
src/kudu/util/cache-bench.cc | 84 +++++++++++++++++++++++++++++++++-----------
1 file changed, 63 insertions(+), 21 deletions(-)
diff --git a/src/kudu/util/cache-bench.cc b/src/kudu/util/cache-bench.cc
index 5c11689d9..813c54aba 100644
--- a/src/kudu/util/cache-bench.cc
+++ b/src/kudu/util/cache-bench.cc
@@ -55,9 +55,10 @@ namespace kudu {
// Benchmark a 1GB cache.
static constexpr int kCacheCapacity = 1024 * 1024 * 1024;
-static constexpr int kProbationarySegmentCapacity = 820 * 1024 * 1024;
-static constexpr int kProtectedSegmentCapacity = 204 * 1024 * 1024;
-static constexpr uint32_t kLookups = 3;
+static constexpr int kProbationarySegmentCapacity = 204 * 1024 * 1024;
+static constexpr int kProtectedSegmentCapacity = kCacheCapacity -
kProbationarySegmentCapacity;
+static constexpr uint32_t kLookups = 2;
+static constexpr uint16_t kMaxMultiplier = 256;
// Use 4kb entries.
static constexpr int kEntrySize = 4 * 1024;
@@ -69,10 +70,23 @@ struct BenchSetup {
// vast majority of lookups.
ZIPFIAN,
// Every item is equally likely to be looked up.
- UNIFORM
+ UNIFORM,
+ // A small number of pre-determined items with small values are frequently
looked up
+ // while random items with large values are looked up less frequently.
+ PRE_DETERMINED_FREQUENT_LOOKUPS
};
Pattern pattern;
+ string ToString(Pattern pattern) const {
+ switch (pattern) {
+ case Pattern::ZIPFIAN: return "ZIPFIAN";
+ case Pattern::UNIFORM: return "UNIFORM";
+ case Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS: return
"PRE_DETERMINED_FREQUENT_LOOKUPS";
+ default: LOG(FATAL) << "unexpected benchmark pattern: " <<
static_cast<int>(pattern); break;
+ }
+ return "unknown benchmark pattern";
+ }
+
// The ratio between the size of the dataset and the cache.
//
// A value smaller than 1 will ensure that the whole dataset fits
@@ -83,10 +97,7 @@ struct BenchSetup {
string ToString() const {
string ret;
- switch (pattern) {
- case Pattern::ZIPFIAN: ret += "ZIPFIAN"; break;
- case Pattern::UNIFORM: ret += "UNIFORM"; break;
- }
+ ret += ToString(pattern);
if (eviction_policy == Cache::EvictionPolicy::SLRU) {
ret += " SLRU";
} else {
@@ -122,18 +133,34 @@ class CacheBench : public KuduTest,
}
// Run queries against the cache until '*done' becomes true.
+ // If 'frequent' is true, the workload is a small set of keys with small
values.
+ // If 'frequent' is false, the workload is a large set of keys with large
values.
// Returns a pair of the number of cache hits and lookups.
- pair<int64_t, int64_t> DoQueries(const atomic<bool>* done) {
+ pair<int64_t, int64_t> DoQueries(const atomic<bool>* done, bool frequent,
uint32_t large_number) {
const BenchSetup& setup = GetParam();
Random r(GetRandomSeed32());
int64_t lookups = 0;
int64_t hits = 0;
while (!*done) {
uint32_t int_key;
- if (setup.pattern == BenchSetup::Pattern::ZIPFIAN) {
- int_key = r.Skewed(Bits::Log2Floor(setup.max_key()));
- } else {
- int_key = r.Uniform(setup.max_key());
+ switch (setup.pattern) {
+ case BenchSetup::Pattern::ZIPFIAN:
+ int_key = r.Skewed(Bits::Log2Floor(setup.max_key()));
+ break;
+ case BenchSetup::Pattern::UNIFORM:
+ int_key = r.Uniform(setup.max_key());
+ break;
+ case BenchSetup::Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS:
+ if (frequent) {
+ auto small_multiplier = r.Uniform(kMaxMultiplier);
+ int_key = large_number * small_multiplier;
+ } else {
+ // Rare random key with big value.
+ int_key = r.Uniform(setup.max_key());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported benchmark pattern" <<
setup.ToString(setup.pattern);
}
char key_buf[sizeof(int_key)];
memcpy(key_buf, &int_key, sizeof(int_key));
@@ -142,8 +169,12 @@ class CacheBench : public KuduTest,
if (h) {
++hits;
} else {
+ int entry_size = kEntrySize;
+ if (setup.pattern ==
BenchSetup::Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS && !frequent) {
+ entry_size = 10000 * kEntrySize;
+ }
auto ph(cache_->Allocate(
- key_slice, /* val_len=*/kEntrySize, /* charge=*/kEntrySize));
+ key_slice, /* val_len=*/entry_size, /* charge=*/entry_size));
cache_->Insert(std::move(ph), nullptr);
}
++lookups;
@@ -153,14 +184,16 @@ class CacheBench : public KuduTest,
// Starts the given number of threads to concurrently call DoQueries.
// Returns the aggregated number of cache hits and lookups.
- pair<int64_t, int64_t> RunQueryThreads(int n_threads, int n_seconds) {
+ pair<int64_t, int64_t> RunQueryThreads(int n_threads, int n_seconds,
uint32_t large_number) {
vector<thread> threads(n_threads);
atomic<bool> done(false);
atomic<int64_t> total_lookups(0);
atomic<int64_t> total_hits(0);
+ bool frequent;
for (int i = 0; i < n_threads; i++) {
- threads[i] = thread([&]() {
- pair<int64_t, int64_t> hits_lookups = DoQueries(&done);
+ frequent = i % 2 == 0;
+ threads[i] = thread([&, frequent]() {
+ pair<int64_t, int64_t> hits_lookups = DoQueries(&done, frequent,
large_number);
total_hits += hits_lookups.first;
total_lookups += hits_lookups.second;
});
@@ -187,20 +220,29 @@ INSTANTIATE_TEST_SUITE_P(Patterns, CacheBench,
testing::ValuesIn(std::vector<Ben
{BenchSetup::Pattern::UNIFORM, 1.0, Cache::EvictionPolicy::LRU},
{BenchSetup::Pattern::UNIFORM, 1.0, Cache::EvictionPolicy::SLRU},
{BenchSetup::Pattern::UNIFORM, 3.0, Cache::EvictionPolicy::LRU},
- {BenchSetup::Pattern::UNIFORM, 3.0, Cache::EvictionPolicy::SLRU}
+ {BenchSetup::Pattern::UNIFORM, 3.0, Cache::EvictionPolicy::SLRU},
+ {BenchSetup::Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS, 1.0,
Cache::EvictionPolicy::LRU},
+ {BenchSetup::Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS, 1.0,
Cache::EvictionPolicy::SLRU},
+ {BenchSetup::Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS, 3.0,
Cache::EvictionPolicy::LRU},
+ {BenchSetup::Pattern::PRE_DETERMINED_FREQUENT_LOOKUPS, 3.0,
Cache::EvictionPolicy::SLRU}
}));
TEST_P(CacheBench, RunBench) {
const BenchSetup& setup = GetParam();
- // Run a short warmup phase to try to populate the cache. Otherwise even if
the
+ Random r(GetRandomSeed32());
+ uint32_t large_number_max = setup.max_key() / kMaxMultiplier;
+ uint32_t large_number = r.Uniform(large_number_max);
+
+ // Run a short warmup phase to try to populate the cache. Otherwise, even if
the
// dataset is smaller than the cache capacity, we would count a bunch of
misses
// during the warm-up phase.
LOG(INFO) << "Warming up...";
- RunQueryThreads(FLAGS_num_threads, 1);
+ RunQueryThreads(FLAGS_num_threads, 1, large_number);
LOG(INFO) << "Running benchmark...";
- pair<int64_t, int64_t> hits_lookups = RunQueryThreads(FLAGS_num_threads,
FLAGS_run_seconds);
+ pair<int64_t, int64_t> hits_lookups = RunQueryThreads(FLAGS_num_threads,
FLAGS_run_seconds,
+ large_number);
int64_t hits = hits_lookups.first;
int64_t lookups = hits_lookups.second;