This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.18.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.18.x by this push:
     new a74ba6d8c KUDU-613: Make segment metrics more relevant
a74ba6d8c is described below

commit a74ba6d8c94cf9223e83bf258c765e88868df358
Author: Mahesh Reddy <[email protected]>
AuthorDate: Thu Dec 19 18:13:14 2024 -0500

    KUDU-613: Make segment metrics more relevant
    
    This patch removes the segment-level lookup metrics
    along with the upgrade and downgrade counter metrics.
    
    It instead adds histogram metrics to keep track of
    the stats of upgrades and downgrades per entry. The
    total number of upgrades and downgrades for the cache
    can be derived from these new metrics using the field
    'total_count'.
    
    Counters to keep track of upgrades and downgrades
    are also added on the handle level.
    
    Change-Id: Id44828be41a19593e6808debf7e9ba8e1fc4dcca
    Reviewed-on: http://gerrit.cloudera.org:8080/22253
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
    (cherry picked from commit 622683201dc1b5c1b056d84565ee6cae8964ddd2)
    Reviewed-on: http://gerrit.cloudera.org:8080/22335
    Reviewed-by: Abhishek Chennaka <[email protected]>
---
 src/kudu/util/block_cache_metrics.cc | 87 +++++-------------------------------
 src/kudu/util/cache_metrics.h        | 16 +------
 src/kudu/util/slru_cache.cc          | 69 +++++++++-------------------
 src/kudu/util/slru_cache.h           | 10 +++--
 4 files changed, 39 insertions(+), 143 deletions(-)

diff --git a/src/kudu/util/block_cache_metrics.cc 
b/src/kudu/util/block_cache_metrics.cc
index c6a626dd4..6287763a9 100644
--- a/src/kudu/util/block_cache_metrics.cc
+++ b/src/kudu/util/block_cache_metrics.cc
@@ -59,53 +59,23 @@ METRIC_DEFINE_gauge_uint64(server, block_cache_usage, 
"Block Cache Memory Usage"
                            "Memory consumed by the block cache",
                            kudu::MetricLevel::kInfo);
 
-METRIC_DEFINE_counter(server, block_cache_upgrades,
-                      "Block Cache Upgrades", kudu::MetricUnit::kBlocks,
-                      "Number of blocks upgraded from the probationary segment 
to "
-                      "the protected segment of the block cache",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_downgrades,
-                      "Block Cache downgrades", kudu::MetricUnit::kBlocks,
-                      "Number of blocks downgraded from the protected segment 
to "
-                      "the probationary segment of the block cache",
-                      kudu::MetricLevel::kDebug);
+METRIC_DEFINE_histogram(server, block_cache_upgrades_stats,
+                        "Block Cache Upgrades Stats", 
kudu::MetricUnit::kBlocks,
+                        "Histogram of the number of times an entry has been 
upgraded",
+                        kudu::MetricLevel::kDebug, 1000000, 2);
+METRIC_DEFINE_histogram(server, block_cache_downgrades_stats,
+                        "Block Cache Downgrades Stats", 
kudu::MetricUnit::kBlocks,
+                        "Histogram of the number of times an entry has been 
downgraded",
+                        kudu::MetricLevel::kDebug, 1000000, 2);
 
 METRIC_DEFINE_counter(server, block_cache_probationary_segment_inserts,
                       "Block Cache Probationary Segment Inserts", 
kudu::MetricUnit::kBlocks,
                       "Number of blocks inserted in the probationary segment 
of the cache",
                       kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_lookups,
-                      "Block Cache Probationary Segment Lookups", 
kudu::MetricUnit::kBlocks,
-                      "Number of blocks looked up from the probationary 
segment of the cache",
-                      kudu::MetricLevel::kDebug);
 METRIC_DEFINE_counter(server, block_cache_probationary_segment_evictions,
                       "Block Cache Probationary Segment Evictions", 
kudu::MetricUnit::kBlocks,
                       "Number of blocks evicted from the probationary segment 
of the cache",
                       kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_misses,
-                      "Block Cache Probationary Segment Misses", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the probationary segment that 
didn't yield a block",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_misses_caching,
-                      "Block Cache Probationary Segment Misses (Caching)",
-                      kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the probationary segment that were 
expecting a block "
-                      "that didn't yield one. Use this number instead of "
-                      "block_cache_probationary_segment_misses when trying to 
determine how "
-                      "efficient the probationary segment is",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_hits,
-                      "Block Cache Probationary Segment Hits", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the probationary segment that 
found a block",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_probationary_segment_hits_caching,
-                      "Block Cache Probationary Segment Hits (Caching)", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the probationary segment that were 
expecting a block "
-                      "that found one. Use this number instead of "
-                      "block_cache_probationary_segment_hits when trying to 
determine "
-                      "how efficient the probationary segment is",
-                      kudu::MetricLevel::kDebug);
-
 METRIC_DEFINE_gauge_uint64(server, block_cache_probationary_segment_usage,
                            "Block Cache Probationary Segment Memory Usage",
                            kudu::MetricUnit::kBytes,
@@ -116,36 +86,10 @@ METRIC_DEFINE_counter(server, 
block_cache_protected_segment_inserts,
                       "Block Cache Protected Segment Inserts", 
kudu::MetricUnit::kBlocks,
                       "Number of blocks inserted in the protected segment of 
the cache",
                       kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_lookups,
-                      "Block Cache Protected Segment Lookups", 
kudu::MetricUnit::kBlocks,
-                      "Number of blocks looked up from the protected segment 
of the cache",
-                      kudu::MetricLevel::kDebug);
 METRIC_DEFINE_counter(server, block_cache_protected_segment_evictions,
                       "Block Cache Protected Segment Evictions", 
kudu::MetricUnit::kBlocks,
                       "Number of blocks evicted from the protected segment of 
the cache",
                       kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_misses,
-                      "Block Cache Protected Segment Misses", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the protected segment that didn't 
yield a block",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_misses_caching,
-                      "Block Cache Protected Segment Misses (Caching)", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the protected segment that were 
expecting a block that "
-                      "didn't yield one. Use this number instead of "
-                      "block_cache_protected_segment_misses when trying to 
determine "
-                      "how efficient the protected segment is",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_hits,
-                      "Block Cache Protected Segment Hits", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the protected segment that found a 
block",
-                      kudu::MetricLevel::kDebug);
-METRIC_DEFINE_counter(server, block_cache_protected_segment_hits_caching,
-                      "Block Cache Protected Segment Hits (Caching)", 
kudu::MetricUnit::kBlocks,
-                      "Number of lookups in the protected segment that were 
expecting a block that "
-                      "found one. Use this number instead of 
block_cache_protected_segment_hits "
-                      "when trying to determine how efficient the protected 
segment is",
-                      kudu::MetricLevel::kDebug);
-
 METRIC_DEFINE_gauge_uint64(server, block_cache_protected_segment_usage,
                            "Block Cache Protected Segment Memory Usage", 
kudu::MetricUnit::kBytes,
                            "Memory consumed by the protected segment of the 
block cache",
@@ -176,25 +120,14 @@ SLRUCacheMetrics::SLRUCacheMetrics(const 
scoped_refptr<MetricEntity>& entity) {
   MINIT(cache_misses_caching, block_cache_misses_caching);
   GINIT(cache_usage, block_cache_usage);
 
-  MINIT(upgrades, block_cache_upgrades);
-  MINIT(downgrades, block_cache_downgrades);
+  MINIT(upgrades_stats, block_cache_upgrades_stats);
+  MINIT(downgrades_stats, block_cache_downgrades_stats);
 
   MINIT(probationary_segment_inserts, 
block_cache_probationary_segment_inserts);
-  MINIT(probationary_segment_lookups, 
block_cache_probationary_segment_lookups);
   MINIT(probationary_segment_evictions, 
block_cache_probationary_segment_evictions);
-  MINIT(probationary_segment_cache_hits, 
block_cache_probationary_segment_hits);
-  MINIT(probationary_segment_cache_hits_caching, 
block_cache_probationary_segment_hits_caching);
-  MINIT(probationary_segment_cache_misses, 
block_cache_probationary_segment_misses);
-  MINIT(probationary_segment_cache_misses_caching, 
block_cache_probationary_segment_misses_caching);
   GINIT(probationary_segment_cache_usage, 
block_cache_probationary_segment_usage);
-
   MINIT(protected_segment_inserts, block_cache_protected_segment_inserts);
-  MINIT(protected_segment_lookups, block_cache_protected_segment_lookups);
   MINIT(protected_segment_evictions, block_cache_protected_segment_evictions);
-  MINIT(protected_segment_cache_hits, block_cache_protected_segment_hits);
-  MINIT(protected_segment_cache_hits_caching, 
block_cache_protected_segment_hits_caching);
-  MINIT(protected_segment_cache_misses, block_cache_protected_segment_misses);
-  MINIT(protected_segment_cache_misses_caching, 
block_cache_protected_segment_misses_caching);
   GINIT(protected_segment_cache_usage, block_cache_protected_segment_usage);
 }
 #undef MINIT
diff --git a/src/kudu/util/cache_metrics.h b/src/kudu/util/cache_metrics.h
index 018880a3a..a97a674c1 100644
--- a/src/kudu/util/cache_metrics.h
+++ b/src/kudu/util/cache_metrics.h
@@ -43,27 +43,15 @@ struct CacheMetrics {
 struct SLRUCacheMetrics : public CacheMetrics {
   explicit SLRUCacheMetrics(const scoped_refptr<MetricEntity>& entity);
 
-  scoped_refptr<Counter> upgrades;
-  scoped_refptr<Counter> downgrades;
+  scoped_refptr<Histogram> upgrades_stats;
+  scoped_refptr<Histogram> downgrades_stats;
 
   scoped_refptr<Counter> probationary_segment_inserts;
-  scoped_refptr<Counter> probationary_segment_lookups;
   scoped_refptr<Counter> probationary_segment_evictions;
-  scoped_refptr<Counter> probationary_segment_cache_hits;
-  scoped_refptr<Counter> probationary_segment_cache_hits_caching;
-  scoped_refptr<Counter> probationary_segment_cache_misses;
-  scoped_refptr<Counter> probationary_segment_cache_misses_caching;
-
   scoped_refptr<AtomicGauge<uint64_t>> probationary_segment_cache_usage;
 
   scoped_refptr<Counter> protected_segment_inserts;
-  scoped_refptr<Counter> protected_segment_lookups;
   scoped_refptr<Counter> protected_segment_evictions;
-  scoped_refptr<Counter> protected_segment_cache_hits;
-  scoped_refptr<Counter> protected_segment_cache_hits_caching;
-  scoped_refptr<Counter> protected_segment_cache_misses;
-  scoped_refptr<Counter> protected_segment_cache_misses_caching;
-
   scoped_refptr<AtomicGauge<uint64_t>> protected_segment_cache_usage;
 };
 
diff --git a/src/kudu/util/slru_cache.cc b/src/kudu/util/slru_cache.cc
index 021f97c9a..c77901d08 100644
--- a/src/kudu/util/slru_cache.cc
+++ b/src/kudu/util/slru_cache.cc
@@ -142,47 +142,6 @@ void SLRUCacheShard<segment>::UpdateMetricsLookup(bool 
was_hit, bool caching) {
   }
 }
 
-template<>
-void SLRUCacheShard<Segment::kProbationary>::UpdateSegmentMetricsLookup(bool 
was_hit,
-                                                                        bool 
caching) {
-  if (PREDICT_TRUE(metrics_)) {
-    metrics_->probationary_segment_lookups->Increment();
-    if (was_hit) {
-      if (caching) {
-        metrics_->probationary_segment_cache_hits_caching->Increment();
-      } else {
-        metrics_->probationary_segment_cache_hits->Increment();
-      }
-    } else {
-      if (caching) {
-        metrics_->probationary_segment_cache_misses_caching->Increment();
-      } else {
-        metrics_->probationary_segment_cache_misses->Increment();
-      }
-    }
-  }
-}
-
-template<>
-void SLRUCacheShard<Segment::kProtected>::UpdateSegmentMetricsLookup(bool 
was_hit, bool caching) {
-  if (PREDICT_TRUE(metrics_)) {
-    metrics_->protected_segment_lookups->Increment();
-    if (was_hit) {
-      if (caching) {
-        metrics_->protected_segment_cache_hits_caching->Increment();
-      } else {
-        metrics_->protected_segment_cache_hits->Increment();
-      }
-    } else {
-      if (caching) {
-        metrics_->protected_segment_cache_misses_caching->Increment();
-      } else {
-        metrics_->protected_segment_cache_misses->Increment();
-      }
-    }
-  }
-}
-
 template<Segment segment>
 void SLRUCacheShard<segment>::RL_Remove(SLRUHandle* e) {
   e->next->prev = e->prev;
@@ -211,14 +170,13 @@ void 
SLRUCacheShard<segment>::RL_UpdateAfterLookup(SLRUHandle* e) {
 // shards and its tables are protected by mutexes. Same logic applies to the 
all the below
 // methods in SLRUCacheShard.
 template<Segment segment>
-Handle* SLRUCacheShard<segment>::Lookup(const Slice& key, uint32_t hash, bool 
caching) {
+Handle* SLRUCacheShard<segment>::Lookup(const Slice& key, uint32_t hash) {
   SLRUHandle* e = table_.Lookup(key, hash);
   if (e != nullptr) {
     e->refs.fetch_add(1, std::memory_order_relaxed);
     e->lookups++;
     RL_UpdateAfterLookup(e);
   }
-  UpdateSegmentMetricsLookup(e != nullptr, caching);
 
   return reinterpret_cast<Handle*>(e);
 }
@@ -276,6 +234,11 @@ Handle* 
SLRUCacheShard<Segment::kProbationary>::Insert(SLRUHandle* handle,
       old_entry->next = *free_entries;
       *free_entries = old_entry;
     }
+
+    // In the update case, a new entry is inserted, but it should retain the 
same values for the
+    // fields 'upgrades' and 'downgrades' as the old entry.
+    handle->upgrades = old_entry->upgrades;
+    handle->downgrades = old_entry->downgrades;
   }
   RemoveEntriesPastCapacity(free_entries);
 
@@ -312,6 +275,11 @@ Handle* 
SLRUCacheShard<Segment::kProtected>::Insert(SLRUHandle* handle,
     *free_entries = old_entry;
   }
 
+  // In the update case, a new entry is inserted, but it should retain the 
same values for the
+  // fields 'upgrades' and 'downgrades' as the old entry.
+  handle->upgrades = old_entry->upgrades;
+  handle->downgrades = old_entry->downgrades;
+
   return reinterpret_cast<Handle*>(handle);
 }
 
@@ -320,8 +288,9 @@ template<>
 void SLRUCacheShard<Segment::kProbationary>::ReInsert(SLRUHandle* handle,
                                                       SLRUHandle** 
free_entries) {
   handle->in_protected_segment.store(false, std::memory_order_relaxed);
+  handle->downgrades++;
   if (PREDICT_TRUE(metrics_)) {
-    metrics_->downgrades->Increment();
+    metrics_->downgrades_stats->Increment(handle->downgrades);
     metrics_->probationary_segment_cache_usage->IncrementBy(handle->charge);
     metrics_->probationary_segment_inserts->Increment();
   }
@@ -340,8 +309,9 @@ template<>
 void SLRUCacheShard<Segment::kProtected>::ReInsert(SLRUHandle* handle,
                                                    SLRUHandle** 
/*free_entries*/) {
   handle->in_protected_segment.store(true, std::memory_order_relaxed);
+  handle->upgrades++;
   if (PREDICT_TRUE(metrics_)) {
-    metrics_->upgrades->Increment();
+    metrics_->upgrades_stats->Increment(handle->upgrades);
     metrics_->protected_segment_cache_usage->IncrementBy(handle->charge);
     metrics_->protected_segment_inserts->Increment();
   }
@@ -450,22 +420,21 @@ Handle* SLRUCacheShardPair::Lookup(const Slice& key, 
uint32_t hash, bool caching
   //                protected segment, insert them into the probationary 
segment.
   //      - Miss: Return the handle.
   //
-  // Lookup metrics for both segments and the high-level cache are updated 
with each lookup.
+  // Lookup metrics for the cache are updated with each lookup.
 
   SLRUHandle* probationary_free_entries = nullptr;
   Handle* probationary_handle;
   {
     std::lock_guard l(mutex_);
-    auto* protected_handle = protected_shard_.Lookup(key, hash, caching);
+    auto* protected_handle = protected_shard_.Lookup(key, hash);
 
     // If the key exists in the protected segment, return the result from the 
lookup of the
     // protected segment.
     if (protected_handle) {
       protected_shard_.UpdateMetricsLookup(true, caching);
-      probationary_shard_.UpdateSegmentMetricsLookup(false, caching);
       return protected_handle;
     }
-    probationary_handle = probationary_shard_.Lookup(key, hash, caching);
+    probationary_handle = probationary_shard_.Lookup(key, hash);
 
     // Return null handle if handle is not found in either the probationary or 
protected segment.
     if (!probationary_handle) {
@@ -589,6 +558,8 @@ Cache::UniquePendingHandle ShardedSLRUCache::Allocate(Slice 
key, int val_len, in
                         PendingHandleDeleter(this));
   SLRUHandle* handle = reinterpret_cast<SLRUHandle*>(h.get());
   handle->lookups = 0;
+  handle->upgrades = 0;
+  handle->downgrades = 0;
   handle->in_protected_segment.store(false, std::memory_order_relaxed);
   handle->key_length = key_len;
   handle->val_length = val_len;
diff --git a/src/kudu/util/slru_cache.h b/src/kudu/util/slru_cache.h
index 502ab273c..f611c65ef 100644
--- a/src/kudu/util/slru_cache.h
+++ b/src/kudu/util/slru_cache.h
@@ -66,6 +66,12 @@ struct SLRUHandle {
   // Used for releasing from the right shard in the SLRU cache implementation.
   std::atomic<bool> in_protected_segment;
 
+  // Number of times an entry has been upgraded or downgraded. These fields 
are tied to the key,
+  // not the entry itself. For example, when an entry is updated, a new handle 
is inserted, but it
+  // retains the values of these fields from the previous entry with the same 
key.
+  int32_t upgrades;
+  int32_t downgrades;
+
   // The storage for the key/value pair itself. The data is stored as:
   //   [key bytes ...] [padding up to 8-byte boundary] [value bytes ...]
   uint8_t kv_data[1];   // Beginning of key/value pair
@@ -126,7 +132,7 @@ class SLRUCacheShard {
   // See comments on template specialization for each function for more 
details.
   void ReInsert(SLRUHandle* handle, SLRUHandle** free_entries);
   // Like SLRUCache::Lookup, but with an extra "hash" parameter.
-  Handle* Lookup(const Slice& key, uint32_t hash, bool caching);
+  Handle* Lookup(const Slice& key, uint32_t hash);
   // Reduces the entry's ref by one, frees the entry if no refs are remaining.
   void Release(Handle* handle);
   // Removes entry from shard, returns it to be freed if no refs are remaining.
@@ -139,8 +145,6 @@ class SLRUCacheShard {
   bool Contains(const Slice& key, uint32_t hash);
   // Update the high-level metrics for a lookup operation.
   void UpdateMetricsLookup(bool was_hit, bool caching);
-  // Update the segment-level metrics for a lookup operation.
-  void UpdateSegmentMetricsLookup(bool was_hit, bool caching);
 
  private:
   friend class SLRUCacheShardPair;

Reply via email to