[kudu] branch master updated: [master] allow setting BlockCacheMetrics twice

alexey Tue, 06 Jul 2021 21:36:46 -0700

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git



The following commit(s) were added to refs/heads/master by this push:
     new 422f130  [master] allow setting BlockCacheMetrics twice
422f130 is described below

commit 422f1305ff9dab5d4bb0446aa6fe1e1c40c5b44c
Author: Andrew Wong <[email protected]>
AuthorDate: Tue Jun 22 18:31:47 2021 -0700

    [master] allow setting BlockCacheMetrics twice
    
    In some cases, the way we call Cache::SetMetrics() isn't threadsafe.
    Typically, we only call this once per cache, but this can be a useful
    call to repeat, e.g. if we're constructing a new server that
    instantiates its own metric entity but uses the same cache. This usually
    doesn't happen since most Caches are tied to a single server. However,
    the BlockCache is special in that it's a singleton, so every
    instantiation of a server in the same process will use the same cache.
    
    This was the cause of KUDU-2165, which was resolved by only allowing the
    first caller to set the metrics. While it prevented a race, this meant
    that subsequently started servers would not correctly register metrics.
    
    I have an upcoming patch that will start two servers from the same
    process -- effectively restarting a master after running a tablet copy
    from the same process as a means to automate the addition of a master to
    a cluster. To allow this, this patch adds an option to force the
    resetting of the cache metrics.
    
    Change-Id: I4bb9c6f382a26654f2d324676506441f370ffe61
    Reviewed-on: http://gerrit.cloudera.org:8080/17636
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/cfile/block_cache.cc            |  5 ++--
 src/kudu/cfile/block_cache.h             |  4 ++-
 src/kudu/master/master-test.cc           | 44 ++++++++++++++++++++++++++++++++
 src/kudu/master/master.cc                |  3 ++-
 src/kudu/master/master_options.cc        |  3 ++-
 src/kudu/master/master_options.h         | 10 ++++++++
 src/kudu/master/table_locations_cache.cc |  2 +-
 src/kudu/util/cache-test.cc              |  2 +-
 src/kudu/util/cache.cc                   | 16 +++++++-----
 src/kudu/util/cache.h                    | 18 ++++++++++++-
 src/kudu/util/file_cache.cc              |  2 +-
 src/kudu/util/nvm_cache.cc               |  8 +++++-
 src/kudu/util/ttl_cache.h                |  2 +-
 13 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/src/kudu/cfile/block_cache.cc b/src/kudu/cfile/block_cache.cc
index ab867dc..9da714c 100644
--- a/src/kudu/cfile/block_cache.cc
+++ b/src/kudu/cfile/block_cache.cc
@@ -162,9 +162,10 @@ void BlockCache::Insert(BlockCache::PendingEntry* entry, 
BlockCacheHandle* inser
   inserted->SetHandle(std::move(h));
 }
 
-void BlockCache::StartInstrumentation(const scoped_refptr<MetricEntity>& 
metric_entity) {
+void BlockCache::StartInstrumentation(const scoped_refptr<MetricEntity>& 
metric_entity,
+                                      Cache::ExistingMetricsPolicy 
metrics_policy) {
   std::unique_ptr<BlockCacheMetrics> metrics(new 
BlockCacheMetrics(metric_entity));
-  cache_->SetMetrics(std::move(metrics));
+  cache_->SetMetrics(std::move(metrics), metrics_policy);
 }
 
 } // namespace cfile
diff --git a/src/kudu/cfile/block_cache.h b/src/kudu/cfile/block_cache.h
index deba090..a288fa5 100644
--- a/src/kudu/cfile/block_cache.h
+++ b/src/kudu/cfile/block_cache.h
@@ -133,7 +133,9 @@ class BlockCache {
   // This should be called before the block cache starts serving blocks.
   // Not calling StartInstrumentation will simply result in no block 
cache-related metrics.
   // Calling StartInstrumentation multiple times will reset the metrics each 
time.
-  void StartInstrumentation(const scoped_refptr<MetricEntity>& metric_entity);
+  void StartInstrumentation(
+      const scoped_refptr<MetricEntity>& metric_entity,
+      Cache::ExistingMetricsPolicy metrics_policy = 
Cache::ExistingMetricsPolicy::kKeep);
 
   // Insertion path
   // --------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index c644e4d..33d9f58 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -77,6 +77,7 @@
 #include "kudu/server/rpc_server.h"
 #include "kudu/tablet/metadata.pb.h"
 #include "kudu/util/atomic.h"
+#include "kudu/util/cache.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/env.h"
@@ -125,6 +126,9 @@ DECLARE_bool(mock_table_metrics_for_testing);
 DECLARE_bool(raft_prepare_replacement_before_eviction);
 DECLARE_double(sys_catalog_fail_during_write);
 DECLARE_int32(diagnostics_log_stack_traces_interval_ms);
+DECLARE_int32(flush_threshold_mb);
+DECLARE_int32(flush_threshold_secs);
+DECLARE_int32(flush_upper_bound_ms);
 DECLARE_int32(master_inject_latency_on_tablet_lookups_ms);
 DECLARE_int32(max_table_comment_length);
 DECLARE_int32(rpc_service_queue_length);
@@ -212,6 +216,46 @@ TEST_F(MasterTest, TestShutdownWithoutStart) {
   m.Shutdown();
 }
 
+// Test that ensures that, when specified, restarting a master from within the
+// same process can correctly instantiate proper block cache metrics.
+TEST_F(MasterTest, TestResetBlockCacheMetricsInSameProcess) {
+  mini_master_->Shutdown();
+  // Make sure we flush quickly so we start using the block cache ASAP.
+  FLAGS_flush_threshold_mb = 1;
+  FLAGS_flush_threshold_secs = 1;
+  FLAGS_flush_upper_bound_ms = 0;
+
+  // If implemented incorrectly, since the BlockCache is a singleton, we could
+  // end up with incorrect block cache metrics upon resetting the master
+  // because we register metrics twice. Ensure that's not the case when we
+  // supply the option to reset metrics.
+  mini_master_->mutable_options()->set_block_cache_metrics_policy(
+      Cache::ExistingMetricsPolicy::kReset);
+  ASSERT_OK(mini_master_->Restart());
+
+  // Keep on creating tables until we flush, at which point we'll use the block
+  // cache and increment metrics.
+  // NOTE: we could examine the master's metric_entity() directly, but that
+  // could itself interfere with the reported metrics, e.g. by calling
+  // FindOrCreateCount(). Going through the web UI is more organic anyway.
+  const Schema kTableSchema({ ColumnSchema("key", INT32), ColumnSchema("v", 
STRING) }, 1);
+  constexpr const char* kTablePrefix = "testtb";
+  EasyCurl c;
+  int i = 0;
+  ASSERT_EVENTUALLY([&] {
+      ASSERT_OK(CreateTable(Substitute("$0-$1", kTablePrefix, i++), 
kTableSchema));
+      faststring buf;
+      ASSERT_OK(c.FetchURL(
+          
Substitute("http://$0/metrics?ids=kudu.master&metrics=block_cache_inserts";,
+                     mini_master_->bound_http_addr().ToString()),
+          &buf));
+      string raw = buf.ToString();
+      // If instrumented correctly, the new master should eventually display
+      // non-zero metrics for the block cache.
+      ASSERT_STR_MATCHES(raw, ".*\"value\": [1-9].*");
+  });
+}
+
 TEST_F(MasterTest, TestRegisterAndHeartbeat) {
   const char* const kTsUUID = "my-ts-uuid";
 
diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc
index 7b9dd47..8ed0929 100644
--- a/src/kudu/master/master.cc
+++ b/src/kudu/master/master.cc
@@ -190,7 +190,8 @@ Master::~Master() {
 Status Master::Init() {
   CHECK_EQ(kStopped, state_);
 
-  cfile::BlockCache::GetSingleton()->StartInstrumentation(metric_entity());
+  cfile::BlockCache::GetSingleton()->StartInstrumentation(
+      metric_entity(), opts_.block_cache_metrics_policy());
 
   
RETURN_NOT_OK(ThreadPoolBuilder("init").set_max_threads(1).Build(&init_pool_));
 
diff --git a/src/kudu/master/master_options.cc 
b/src/kudu/master/master_options.cc
index e07dfbb..f95d2f0 100644
--- a/src/kudu/master/master_options.cc
+++ b/src/kudu/master/master_options.cc
@@ -39,7 +39,8 @@ TAG_FLAG(master_addresses, stable);
 namespace kudu {
 namespace master {
 
-MasterOptions::MasterOptions() {
+MasterOptions::MasterOptions()
+    : block_cache_metrics_policy_(Cache::ExistingMetricsPolicy::kKeep) {
   rpc_opts.default_port = Master::kDefaultPort;
 
   if (!FLAGS_master_addresses.empty()) {
diff --git a/src/kudu/master/master_options.h b/src/kudu/master/master_options.h
index 09b7b0b..b9de110 100644
--- a/src/kudu/master/master_options.h
+++ b/src/kudu/master/master_options.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "kudu/kserver/kserver_options.h"
+#include "kudu/util/cache.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/status.h"
 
@@ -55,8 +56,17 @@ struct MasterOptions : public kserver::KuduServerOptions {
     master_addresses_ = std::move(addresses);
   }
 
+  void set_block_cache_metrics_policy(Cache::ExistingMetricsPolicy b) {
+    block_cache_metrics_policy_ = b;
+  }
+
+  Cache::ExistingMetricsPolicy block_cache_metrics_policy() const {
+    return block_cache_metrics_policy_;
+  }
+
  private:
   std::vector<HostPort> master_addresses_;
+  Cache::ExistingMetricsPolicy block_cache_metrics_policy_;
 };
 
 } // namespace master
diff --git a/src/kudu/master/table_locations_cache.cc 
b/src/kudu/master/table_locations_cache.cc
index a903889..7c82bf4 100644
--- a/src/kudu/master/table_locations_cache.cc
+++ b/src/kudu/master/table_locations_cache.cc
@@ -115,7 +115,7 @@ void TableLocationsCache::Remove(const std::string& 
table_id) {
 
 // Set metrics for the cache.
 void TableLocationsCache::SetMetrics(std::unique_ptr<CacheMetrics> metrics) {
-  cache_->SetMetrics(std::move(metrics));
+  cache_->SetMetrics(std::move(metrics), Cache::ExistingMetricsPolicy::kKeep);
 }
 
 TableLocationsCache::EntryHandle::EntryHandle()
diff --git a/src/kudu/util/cache-test.cc b/src/kudu/util/cache-test.cc
index 88368cb..e96dda7 100644
--- a/src/kudu/util/cache-test.cc
+++ b/src/kudu/util/cache-test.cc
@@ -163,7 +163,7 @@ class CacheBaseTest : public KuduTest,
       scoped_refptr<MetricEntity> entity = METRIC_ENTITY_server.Instantiate(
           &metric_registry_, "test");
       unique_ptr<BlockCacheMetrics> metrics(new BlockCacheMetrics(entity));
-      cache_->SetMetrics(std::move(metrics));
+      cache_->SetMetrics(std::move(metrics), 
Cache::ExistingMetricsPolicy::kKeep);
     }
   }
 
diff --git a/src/kudu/util/cache.cc b/src/kudu/util/cache.cc
index 073fa51..72c69bc 100644
--- a/src/kudu/util/cache.cc
+++ b/src/kudu/util/cache.cc
@@ -567,14 +567,16 @@ class ShardedCache : public Cache {
     STLDeleteElements(&shards_);
   }
 
-  void SetMetrics(std::unique_ptr<CacheMetrics> metrics) override {
-    // TODO(KUDU-2165): reuse of the Cache singleton across multiple 
MiniCluster servers
-    // causes TSAN errors. So, we'll ensure that metrics only get attached 
once, from
-    // whichever server starts first. This has the downside that, in test 
builds, we won't
-    // get accurate cache metrics, but that's probably better than spurious 
failures.
+  void SetMetrics(std::unique_ptr<CacheMetrics> metrics,
+                  ExistingMetricsPolicy metrics_policy) override {
     std::lock_guard<decltype(metrics_lock_)> l(metrics_lock_);
-    if (metrics_) {
-      CHECK(IsGTest()) << "Metrics should only be set once per Cache 
singleton";
+    if (metrics_ && metrics_policy == ExistingMetricsPolicy::kKeep) {
+      // KUDU-2165: reuse of the Cache singleton across multiple 
InternalMiniCluster
+      // servers causes TSAN errors. So, we'll ensure that metrics only get
+      // attached once, from whichever server starts first. This has the 
downside
+      // that, in test builds, we won't get accurate cache metrics, but that's
+      // probably better than spurious failures.
+      CHECK(IsGTest()) << "Metrics should only be set once per Cache";
       return;
     }
     metrics_ = std::move(metrics);
diff --git a/src/kudu/util/cache.h b/src/kudu/util/cache.h
index ff3a309..7a2d785 100644
--- a/src/kudu/util/cache.h
+++ b/src/kudu/util/cache.h
@@ -65,8 +65,24 @@ class Cache {
   // function that was passed to the constructor.
   virtual ~Cache();
 
+  // The behavior when calling SetMetrics() when 'metrics_' is already set.
+  enum class ExistingMetricsPolicy {
+    // Calling SetMetrics() again will be a no-op. This is appropriate in tests
+    // that use a singleton cache that is shared across multiple daemons in the
+    // same process, at the cost of not having accurate cache metrics. This is
+    // useful for avoiding races between the destruction of existing metrics
+    // and the setting of new metrics in new daemons. It is expected that this
+    // is only used in tests.
+    kKeep,
+
+    // SetMetrics() will overwrite the existing metrics. It is up to callers to
+    // ensure this is safe, e.g. by destructing the entity that owned the
+    // original metrics.
+    kReset,
+  };
   // Set the cache metrics to update corresponding counters accordingly.
-  virtual void SetMetrics(std::unique_ptr<CacheMetrics> metrics) = 0;
+  virtual void SetMetrics(std::unique_ptr<CacheMetrics> metrics,
+                          ExistingMetricsPolicy metrics_policy) = 0;
 
   // Opaque handle to an entry stored in the cache.
   struct Handle { };
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index c992f2f..6c02467 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -472,7 +472,7 @@ FileCache::FileCache(const string& cache_name,
       running_(1) {
   if (entity) {
     unique_ptr<FileCacheMetrics> metrics(new FileCacheMetrics(entity));
-    cache_->SetMetrics(std::move(metrics));
+    cache_->SetMetrics(std::move(metrics), 
Cache::ExistingMetricsPolicy::kKeep);
   }
   LOG(INFO) << Substitute("Constructed file cache $0 with capacity $1",
                           cache_name, max_open_files);
diff --git a/src/kudu/util/nvm_cache.cc b/src/kudu/util/nvm_cache.cc
index e4f0508..dc7d290 100644
--- a/src/kudu/util/nvm_cache.cc
+++ b/src/kudu/util/nvm_cache.cc
@@ -51,6 +51,7 @@
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
+#include "kudu/util/test_util_prod.h"
 
 #ifndef MEMKIND_PMEM_MIN_SIZE
 #define MEMKIND_PMEM_MIN_SIZE (1024 * 1024 * 16) // Taken from memkind 1.9.0.
@@ -656,7 +657,12 @@ class ShardedLRUCache : public Cache {
     return reinterpret_cast<LRUHandle*>(handle->get())->val_ptr();
   }
 
-  virtual void SetMetrics(unique_ptr<CacheMetrics> metrics) OVERRIDE {
+  virtual void SetMetrics(unique_ptr<CacheMetrics> metrics,
+                          Cache::ExistingMetricsPolicy metrics_policy) 
OVERRIDE {
+    if (metrics_ && metrics_policy == Cache::ExistingMetricsPolicy::kKeep) {
+      CHECK(IsGTest()) << "Metrics should only be set once per Cache";
+      return;
+    }
     metrics_ = std::move(metrics);
     for (const auto& shard : shards_) {
       shard->SetMetrics(metrics_.get());
diff --git a/src/kudu/util/ttl_cache.h b/src/kudu/util/ttl_cache.h
index 1a0c3ee..5ee6233 100644
--- a/src/kudu/util/ttl_cache.h
+++ b/src/kudu/util/ttl_cache.h
@@ -209,7 +209,7 @@ class TTLCache {
   void SetMetrics(std::unique_ptr<TTLCacheMetrics> metrics) {
     // Keep a copy of the pointer to the metrics: the FIFO cache is the owner.
     metrics_ = metrics.get();
-    cache_->SetMetrics(std::move(metrics));
+    cache_->SetMetrics(std::move(metrics), 
Cache::ExistingMetricsPolicy::kKeep);
   }
 
  private:

[kudu] branch master updated: [master] allow setting BlockCacheMetrics twice

Reply via email to