This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 622683201 KUDU-613: Make segment metrics more relevant
622683201 is described below
commit 622683201dc1b5c1b056d84565ee6cae8964ddd2
Author: Mahesh Reddy <[email protected]>
AuthorDate: Thu Dec 19 18:13:14 2024 -0500
KUDU-613: Make segment metrics more relevant
This patch removes the segment-level lookup metrics
along with the upgrade and downgrade counter metrics.
It instead adds histogram metrics to keep track of
the stats of upgrades and downgrades per entry. The
total number of upgrades and downgrades for the cache
can be derived from these new metrics using the field
'total_count'.
Counters to keep track of upgrades and downgrades
are also added on the handle level.
Change-Id: Id44828be41a19593e6808debf7e9ba8e1fc4dcca
Reviewed-on: http://gerrit.cloudera.org:8080/22253
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/util/block_cache_metrics.cc | 87 +++++-------------------------------
src/kudu/util/cache_metrics.h | 16 +------
src/kudu/util/slru_cache.cc | 69 +++++++++-------------------
src/kudu/util/slru_cache.h | 10 +++--
4 files changed, 39 insertions(+), 143 deletions(-)
diff --git a/src/kudu/util/block_cache_metrics.cc
b/src/kudu/util/block_cache_metrics.cc
index c6a626dd4..6287763a9 100644
--- a/src/kudu/util/block_cache_metrics.cc
+++ b/src/kudu/util/block_cache_metrics.cc
@@ -59,53 +59,23 @@ METRIC_DEFINE_gauge_uint64(server, block_cache_usage,
"Block Cache Memory Usage"
"Memory consumed by the block cache",
kudu::MetricLevel::kInfo);
-METRIC_DEFINE_counter(server, block_cache_upgrades,
- "Block Cache Upgrades", kudu::MetricUnit::kBlocks,
- "Number of blocks upgraded from the probationary segment
to "
- "the protected segment of the block cache",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_downgrades,
- "Block Cache downgrades", kudu::MetricUnit::kBlocks,
- "Number of blocks downgraded from the protected segment
to "
- "the probationary segment of the block cache",
- kudu::MetricLevel::kDebug);
+METRIC_DEFINE_histogram(server, block_cache_upgrades_stats,
+ "Block Cache Upgrades Stats",
kudu::MetricUnit::kBlocks,
+ "Histogram of the number of times an entry has been
upgraded",
+ kudu::MetricLevel::kDebug, 1000000, 2);
+METRIC_DEFINE_histogram(server, block_cache_downgrades_stats,
+ "Block Cache Downgrades Stats",
kudu::MetricUnit::kBlocks,
+ "Histogram of the number of times an entry has been
downgraded",
+ kudu::MetricLevel::kDebug, 1000000, 2);
METRIC_DEFINE_counter(server, block_cache_probationary_segment_inserts,
"Block Cache Probationary Segment Inserts",
kudu::MetricUnit::kBlocks,
"Number of blocks inserted in the probationary segment
of the cache",
kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_lookups,
- "Block Cache Probationary Segment Lookups",
kudu::MetricUnit::kBlocks,
- "Number of blocks looked up from the probationary
segment of the cache",
- kudu::MetricLevel::kDebug);
METRIC_DEFINE_counter(server, block_cache_probationary_segment_evictions,
"Block Cache Probationary Segment Evictions",
kudu::MetricUnit::kBlocks,
"Number of blocks evicted from the probationary segment
of the cache",
kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_misses,
- "Block Cache Probationary Segment Misses",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the probationary segment that
didn't yield a block",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_misses_caching,
- "Block Cache Probationary Segment Misses (Caching)",
- kudu::MetricUnit::kBlocks,
- "Number of lookups in the probationary segment that were
expecting a block "
- "that didn't yield one. Use this number instead of "
- "block_cache_probationary_segment_misses when trying to
determine how "
- "efficient the probationary segment is",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_hits,
- "Block Cache Probationary Segment Hits",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the probationary segment that
found a block",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_hits_caching,
- "Block Cache Probationary Segment Hits (Caching)",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the probationary segment that were
expecting a block "
- "that found one. Use this number instead of "
- "block_cache_probationary_segment_hits when trying to
determine "
- "how efficient the probationary segment is",
- kudu::MetricLevel::kDebug);
-
METRIC_DEFINE_gauge_uint64(server, block_cache_probationary_segment_usage,
"Block Cache Probationary Segment Memory Usage",
kudu::MetricUnit::kBytes,
@@ -116,36 +86,10 @@ METRIC_DEFINE_counter(server,
block_cache_protected_segment_inserts,
"Block Cache Protected Segment Inserts",
kudu::MetricUnit::kBlocks,
"Number of blocks inserted in the protected segment of
the cache",
kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_lookups,
- "Block Cache Protected Segment Lookups",
kudu::MetricUnit::kBlocks,
- "Number of blocks looked up from the protected segment
of the cache",
- kudu::MetricLevel::kDebug);
METRIC_DEFINE_counter(server, block_cache_protected_segment_evictions,
"Block Cache Protected Segment Evictions",
kudu::MetricUnit::kBlocks,
"Number of blocks evicted from the protected segment of
the cache",
kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_misses,
- "Block Cache Protected Segment Misses",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the protected segment that didn't
yield a block",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_misses_caching,
- "Block Cache Protected Segment Misses (Caching)",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the protected segment that were
expecting a block that "
- "didn't yield one. Use this number instead of "
- "block_cache_protected_segment_misses when trying to
determine "
- "how efficient the protected segment is",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_hits,
- "Block Cache Protected Segment Hits",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the protected segment that found a
block",
- kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_hits_caching,
- "Block Cache Protected Segment Hits (Caching)",
kudu::MetricUnit::kBlocks,
- "Number of lookups in the protected segment that were
expecting a block that "
- "found one. Use this number instead of
block_cache_protected_segment_hits "
- "when trying to determine how efficient the protected
segment is",
- kudu::MetricLevel::kDebug);
-
METRIC_DEFINE_gauge_uint64(server, block_cache_protected_segment_usage,
"Block Cache Protected Segment Memory Usage",
kudu::MetricUnit::kBytes,
"Memory consumed by the protected segment of the
block cache",
@@ -176,25 +120,14 @@ SLRUCacheMetrics::SLRUCacheMetrics(const
scoped_refptr<MetricEntity>& entity) {
MINIT(cache_misses_caching, block_cache_misses_caching);
GINIT(cache_usage, block_cache_usage);
- MINIT(upgrades, block_cache_upgrades);
- MINIT(downgrades, block_cache_downgrades);
+ MINIT(upgrades_stats, block_cache_upgrades_stats);
+ MINIT(downgrades_stats, block_cache_downgrades_stats);
MINIT(probationary_segment_inserts,
block_cache_probationary_segment_inserts);
- MINIT(probationary_segment_lookups,
block_cache_probationary_segment_lookups);
MINIT(probationary_segment_evictions,
block_cache_probationary_segment_evictions);
- MINIT(probationary_segment_cache_hits,
block_cache_probationary_segment_hits);
- MINIT(probationary_segment_cache_hits_caching,
block_cache_probationary_segment_hits_caching);
- MINIT(probationary_segment_cache_misses,
block_cache_probationary_segment_misses);
- MINIT(probationary_segment_cache_misses_caching,
block_cache_probationary_segment_misses_caching);
GINIT(probationary_segment_cache_usage,
block_cache_probationary_segment_usage);
-
MINIT(protected_segment_inserts, block_cache_protected_segment_inserts);
- MINIT(protected_segment_lookups, block_cache_protected_segment_lookups);
MINIT(protected_segment_evictions, block_cache_protected_segment_evictions);
- MINIT(protected_segment_cache_hits, block_cache_protected_segment_hits);
- MINIT(protected_segment_cache_hits_caching,
block_cache_protected_segment_hits_caching);
- MINIT(protected_segment_cache_misses, block_cache_protected_segment_misses);
- MINIT(protected_segment_cache_misses_caching,
block_cache_protected_segment_misses_caching);
GINIT(protected_segment_cache_usage, block_cache_protected_segment_usage);
}
#undef MINIT
diff --git a/src/kudu/util/cache_metrics.h b/src/kudu/util/cache_metrics.h
index 018880a3a..a97a674c1 100644
--- a/src/kudu/util/cache_metrics.h
+++ b/src/kudu/util/cache_metrics.h
@@ -43,27 +43,15 @@ struct CacheMetrics {
struct SLRUCacheMetrics : public CacheMetrics {
explicit SLRUCacheMetrics(const scoped_refptr<MetricEntity>& entity);
- scoped_refptr<Counter> upgrades;
- scoped_refptr<Counter> downgrades;
+ scoped_refptr<Histogram> upgrades_stats;
+ scoped_refptr<Histogram> downgrades_stats;
scoped_refptr<Counter> probationary_segment_inserts;
- scoped_refptr<Counter> probationary_segment_lookups;
scoped_refptr<Counter> probationary_segment_evictions;
- scoped_refptr<Counter> probationary_segment_cache_hits;
- scoped_refptr<Counter> probationary_segment_cache_hits_caching;
- scoped_refptr<Counter> probationary_segment_cache_misses;
- scoped_refptr<Counter> probationary_segment_cache_misses_caching;
-
scoped_refptr<AtomicGauge<uint64_t>> probationary_segment_cache_usage;
scoped_refptr<Counter> protected_segment_inserts;
- scoped_refptr<Counter> protected_segment_lookups;
scoped_refptr<Counter> protected_segment_evictions;
- scoped_refptr<Counter> protected_segment_cache_hits;
- scoped_refptr<Counter> protected_segment_cache_hits_caching;
- scoped_refptr<Counter> protected_segment_cache_misses;
- scoped_refptr<Counter> protected_segment_cache_misses_caching;
-
scoped_refptr<AtomicGauge<uint64_t>> protected_segment_cache_usage;
};
diff --git a/src/kudu/util/slru_cache.cc b/src/kudu/util/slru_cache.cc
index 021f97c9a..c77901d08 100644
--- a/src/kudu/util/slru_cache.cc
+++ b/src/kudu/util/slru_cache.cc
@@ -142,47 +142,6 @@ void SLRUCacheShard<segment>::UpdateMetricsLookup(bool
was_hit, bool caching) {
}
}
-template<>
-void SLRUCacheShard<Segment::kProbationary>::UpdateSegmentMetricsLookup(bool
was_hit,
- bool
caching) {
- if (PREDICT_TRUE(metrics_)) {
- metrics_->probationary_segment_lookups->Increment();
- if (was_hit) {
- if (caching) {
- metrics_->probationary_segment_cache_hits_caching->Increment();
- } else {
- metrics_->probationary_segment_cache_hits->Increment();
- }
- } else {
- if (caching) {
- metrics_->probationary_segment_cache_misses_caching->Increment();
- } else {
- metrics_->probationary_segment_cache_misses->Increment();
- }
- }
- }
-}
-
-template<>
-void SLRUCacheShard<Segment::kProtected>::UpdateSegmentMetricsLookup(bool
was_hit, bool caching) {
- if (PREDICT_TRUE(metrics_)) {
- metrics_->protected_segment_lookups->Increment();
- if (was_hit) {
- if (caching) {
- metrics_->protected_segment_cache_hits_caching->Increment();
- } else {
- metrics_->protected_segment_cache_hits->Increment();
- }
- } else {
- if (caching) {
- metrics_->protected_segment_cache_misses_caching->Increment();
- } else {
- metrics_->protected_segment_cache_misses->Increment();
- }
- }
- }
-}
-
template<Segment segment>
void SLRUCacheShard<segment>::RL_Remove(SLRUHandle* e) {
e->next->prev = e->prev;
@@ -211,14 +170,13 @@ void
SLRUCacheShard<segment>::RL_UpdateAfterLookup(SLRUHandle* e) {
// shards and its tables are protected by mutexes. Same logic applies to the
all the below
// methods in SLRUCacheShard.
template<Segment segment>
-Handle* SLRUCacheShard<segment>::Lookup(const Slice& key, uint32_t hash, bool
caching) {
+Handle* SLRUCacheShard<segment>::Lookup(const Slice& key, uint32_t hash) {
SLRUHandle* e = table_.Lookup(key, hash);
if (e != nullptr) {
e->refs.fetch_add(1, std::memory_order_relaxed);
e->lookups++;
RL_UpdateAfterLookup(e);
}
- UpdateSegmentMetricsLookup(e != nullptr, caching);
return reinterpret_cast<Handle*>(e);
}
@@ -276,6 +234,11 @@ Handle*
SLRUCacheShard<Segment::kProbationary>::Insert(SLRUHandle* handle,
old_entry->next = *free_entries;
*free_entries = old_entry;
}
+
+ // In the update case, a new entry is inserted, but it should retain the
same values for the
+ // fields 'upgrades' and 'downgrades' as the old entry.
+ handle->upgrades = old_entry->upgrades;
+ handle->downgrades = old_entry->downgrades;
}
RemoveEntriesPastCapacity(free_entries);
@@ -312,6 +275,11 @@ Handle*
SLRUCacheShard<Segment::kProtected>::Insert(SLRUHandle* handle,
*free_entries = old_entry;
}
+ // In the update case, a new entry is inserted, but it should retain the
same values for the
+ // fields 'upgrades' and 'downgrades' as the old entry.
+ handle->upgrades = old_entry->upgrades;
+ handle->downgrades = old_entry->downgrades;
+
return reinterpret_cast<Handle*>(handle);
}
@@ -320,8 +288,9 @@ template<>
void SLRUCacheShard<Segment::kProbationary>::ReInsert(SLRUHandle* handle,
SLRUHandle**
free_entries) {
handle->in_protected_segment.store(false, std::memory_order_relaxed);
+ handle->downgrades++;
if (PREDICT_TRUE(metrics_)) {
- metrics_->downgrades->Increment();
+ metrics_->downgrades_stats->Increment(handle->downgrades);
metrics_->probationary_segment_cache_usage->IncrementBy(handle->charge);
metrics_->probationary_segment_inserts->Increment();
}
@@ -340,8 +309,9 @@ template<>
void SLRUCacheShard<Segment::kProtected>::ReInsert(SLRUHandle* handle,
SLRUHandle**
/*free_entries*/) {
handle->in_protected_segment.store(true, std::memory_order_relaxed);
+ handle->upgrades++;
if (PREDICT_TRUE(metrics_)) {
- metrics_->upgrades->Increment();
+ metrics_->upgrades_stats->Increment(handle->upgrades);
metrics_->protected_segment_cache_usage->IncrementBy(handle->charge);
metrics_->protected_segment_inserts->Increment();
}
@@ -450,22 +420,21 @@ Handle* SLRUCacheShardPair::Lookup(const Slice& key,
uint32_t hash, bool caching
// protected segment, insert them into the probationary
segment.
// - Miss: Return the handle.
//
- // Lookup metrics for both segments and the high-level cache are updated
with each lookup.
+ // Lookup metrics for the cache are updated with each lookup.
SLRUHandle* probationary_free_entries = nullptr;
Handle* probationary_handle;
{
std::lock_guard l(mutex_);
- auto* protected_handle = protected_shard_.Lookup(key, hash, caching);
+ auto* protected_handle = protected_shard_.Lookup(key, hash);
// If the key exists in the protected segment, return the result from the
lookup of the
// protected segment.
if (protected_handle) {
protected_shard_.UpdateMetricsLookup(true, caching);
- probationary_shard_.UpdateSegmentMetricsLookup(false, caching);
return protected_handle;
}
- probationary_handle = probationary_shard_.Lookup(key, hash, caching);
+ probationary_handle = probationary_shard_.Lookup(key, hash);
// Return null handle if handle is not found in either the probationary or
protected segment.
if (!probationary_handle) {
@@ -589,6 +558,8 @@ Cache::UniquePendingHandle ShardedSLRUCache::Allocate(Slice
key, int val_len, in
PendingHandleDeleter(this));
SLRUHandle* handle = reinterpret_cast<SLRUHandle*>(h.get());
handle->lookups = 0;
+ handle->upgrades = 0;
+ handle->downgrades = 0;
handle->in_protected_segment.store(false, std::memory_order_relaxed);
handle->key_length = key_len;
handle->val_length = val_len;
diff --git a/src/kudu/util/slru_cache.h b/src/kudu/util/slru_cache.h
index 502ab273c..f611c65ef 100644
--- a/src/kudu/util/slru_cache.h
+++ b/src/kudu/util/slru_cache.h
@@ -66,6 +66,12 @@ struct SLRUHandle {
// Used for releasing from the right shard in the SLRU cache implementation.
std::atomic<bool> in_protected_segment;
+ // Number of times an entry has been upgraded or downgraded. These fields
are tied to the key,
+ // not the entry itself. For example, when an entry is updated, a new handle
is inserted, but it
+ // retains the values of these fields from the previous entry with the same
key.
+ int32_t upgrades;
+ int32_t downgrades;
+
// The storage for the key/value pair itself. The data is stored as:
// [key bytes ...] [padding up to 8-byte boundary] [value bytes ...]
uint8_t kv_data[1]; // Beginning of key/value pair
@@ -126,7 +132,7 @@ class SLRUCacheShard {
// See comments on template specialization for each function for more
details.
void ReInsert(SLRUHandle* handle, SLRUHandle** free_entries);
// Like SLRUCache::Lookup, but with an extra "hash" parameter.
- Handle* Lookup(const Slice& key, uint32_t hash, bool caching);
+ Handle* Lookup(const Slice& key, uint32_t hash);
// Reduces the entry's ref by one, frees the entry if no refs are remaining.
void Release(Handle* handle);
// Removes entry from shard, returns it to be freed if no refs are remaining.
@@ -139,8 +145,6 @@ class SLRUCacheShard {
bool Contains(const Slice& key, uint32_t hash);
// Update the high-level metrics for a lookup operation.
void UpdateMetricsLookup(bool was_hit, bool caching);
- // Update the segment-level metrics for a lookup operation.
- void UpdateSegmentMetricsLookup(bool was_hit, bool caching);
private:
friend class SLRUCacheShardPair;