This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 422f130 [master] allow setting BlockCacheMetrics twice
422f130 is described below
commit 422f1305ff9dab5d4bb0446aa6fe1e1c40c5b44c
Author: Andrew Wong <[email protected]>
AuthorDate: Tue Jun 22 18:31:47 2021 -0700
[master] allow setting BlockCacheMetrics twice
In some cases, the way we call Cache::SetMetrics() isn't threadsafe.
Typically, we only call this once per cache, but this can be a useful
call to repeat, e.g. if we're constructing a new server that
instantiates its own metric entity but uses the same cache. This usually
doesn't happen since most Caches are tied to a single server. However,
the BlockCache is special in that it's a singleton, so every
instantiation of a server in the same process will use the same cache.
This was the cause of KUDU-2165, which was resolved by only allowing the
first caller to set the metrics. While it prevented a race, this meant
that subsequently started servers would not correctly register metrics.
I have an upcoming patch that will start two servers from the same
process -- effectively restarting a master after running a tablet copy
from the same process as a means to automate the addition of a master to
a cluster. To allow this, this patch adds an option to force the
resetting of the cache metrics.
Change-Id: I4bb9c6f382a26654f2d324676506441f370ffe61
Reviewed-on: http://gerrit.cloudera.org:8080/17636
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/cfile/block_cache.cc | 5 ++--
src/kudu/cfile/block_cache.h | 4 ++-
src/kudu/master/master-test.cc | 44 ++++++++++++++++++++++++++++++++
src/kudu/master/master.cc | 3 ++-
src/kudu/master/master_options.cc | 3 ++-
src/kudu/master/master_options.h | 10 ++++++++
src/kudu/master/table_locations_cache.cc | 2 +-
src/kudu/util/cache-test.cc | 2 +-
src/kudu/util/cache.cc | 16 +++++++-----
src/kudu/util/cache.h | 18 ++++++++++++-
src/kudu/util/file_cache.cc | 2 +-
src/kudu/util/nvm_cache.cc | 8 +++++-
src/kudu/util/ttl_cache.h | 2 +-
13 files changed, 101 insertions(+), 18 deletions(-)
diff --git a/src/kudu/cfile/block_cache.cc b/src/kudu/cfile/block_cache.cc
index ab867dc..9da714c 100644
--- a/src/kudu/cfile/block_cache.cc
+++ b/src/kudu/cfile/block_cache.cc
@@ -162,9 +162,10 @@ void BlockCache::Insert(BlockCache::PendingEntry* entry,
BlockCacheHandle* inser
inserted->SetHandle(std::move(h));
}
-void BlockCache::StartInstrumentation(const scoped_refptr<MetricEntity>&
metric_entity) {
+void BlockCache::StartInstrumentation(const scoped_refptr<MetricEntity>&
metric_entity,
+ Cache::ExistingMetricsPolicy
metrics_policy) {
std::unique_ptr<BlockCacheMetrics> metrics(new
BlockCacheMetrics(metric_entity));
- cache_->SetMetrics(std::move(metrics));
+ cache_->SetMetrics(std::move(metrics), metrics_policy);
}
} // namespace cfile
diff --git a/src/kudu/cfile/block_cache.h b/src/kudu/cfile/block_cache.h
index deba090..a288fa5 100644
--- a/src/kudu/cfile/block_cache.h
+++ b/src/kudu/cfile/block_cache.h
@@ -133,7 +133,9 @@ class BlockCache {
// This should be called before the block cache starts serving blocks.
// Not calling StartInstrumentation will simply result in no block
cache-related metrics.
// Calling StartInstrumentation multiple times will reset the metrics each
time.
- void StartInstrumentation(const scoped_refptr<MetricEntity>& metric_entity);
+ void StartInstrumentation(
+ const scoped_refptr<MetricEntity>& metric_entity,
+ Cache::ExistingMetricsPolicy metrics_policy =
Cache::ExistingMetricsPolicy::kKeep);
// Insertion path
// --------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index c644e4d..33d9f58 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -77,6 +77,7 @@
#include "kudu/server/rpc_server.h"
#include "kudu/tablet/metadata.pb.h"
#include "kudu/util/atomic.h"
+#include "kudu/util/cache.h"
#include "kudu/util/countdown_latch.h"
#include "kudu/util/curl_util.h"
#include "kudu/util/env.h"
@@ -125,6 +126,9 @@ DECLARE_bool(mock_table_metrics_for_testing);
DECLARE_bool(raft_prepare_replacement_before_eviction);
DECLARE_double(sys_catalog_fail_during_write);
DECLARE_int32(diagnostics_log_stack_traces_interval_ms);
+DECLARE_int32(flush_threshold_mb);
+DECLARE_int32(flush_threshold_secs);
+DECLARE_int32(flush_upper_bound_ms);
DECLARE_int32(master_inject_latency_on_tablet_lookups_ms);
DECLARE_int32(max_table_comment_length);
DECLARE_int32(rpc_service_queue_length);
@@ -212,6 +216,46 @@ TEST_F(MasterTest, TestShutdownWithoutStart) {
m.Shutdown();
}
+// Test that ensures that, when specified, restarting a master from within the
+// same process can correctly instantiate proper block cache metrics.
+TEST_F(MasterTest, TestResetBlockCacheMetricsInSameProcess) {
+ mini_master_->Shutdown();
+ // Make sure we flush quickly so we start using the block cache ASAP.
+ FLAGS_flush_threshold_mb = 1;
+ FLAGS_flush_threshold_secs = 1;
+ FLAGS_flush_upper_bound_ms = 0;
+
+ // If implemented incorrectly, since the BlockCache is a singleton, we could
+ // end up with incorrect block cache metrics upon resetting the master
+ // because we register metrics twice. Ensure that's not the case when we
+ // supply the option to reset metrics.
+ mini_master_->mutable_options()->set_block_cache_metrics_policy(
+ Cache::ExistingMetricsPolicy::kReset);
+ ASSERT_OK(mini_master_->Restart());
+
+ // Keep on creating tables until we flush, at which point we'll use the block
+ // cache and increment metrics.
+ // NOTE: we could examine the master's metric_entity() directly, but that
+ // could itself interfere with the reported metrics, e.g. by calling
+ // FindOrCreateCount(). Going through the web UI is more organic anyway.
+ const Schema kTableSchema({ ColumnSchema("key", INT32), ColumnSchema("v",
STRING) }, 1);
+ constexpr const char* kTablePrefix = "testtb";
+ EasyCurl c;
+ int i = 0;
+ ASSERT_EVENTUALLY([&] {
+ ASSERT_OK(CreateTable(Substitute("$0-$1", kTablePrefix, i++),
kTableSchema));
+ faststring buf;
+ ASSERT_OK(c.FetchURL(
+
Substitute("http://$0/metrics?ids=kudu.master&metrics=block_cache_inserts",
+ mini_master_->bound_http_addr().ToString()),
+ &buf));
+ string raw = buf.ToString();
+ // If instrumented correctly, the new master should eventually display
+ // non-zero metrics for the block cache.
+ ASSERT_STR_MATCHES(raw, ".*\"value\": [1-9].*");
+ });
+}
+
TEST_F(MasterTest, TestRegisterAndHeartbeat) {
const char* const kTsUUID = "my-ts-uuid";
diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc
index 7b9dd47..8ed0929 100644
--- a/src/kudu/master/master.cc
+++ b/src/kudu/master/master.cc
@@ -190,7 +190,8 @@ Master::~Master() {
Status Master::Init() {
CHECK_EQ(kStopped, state_);
- cfile::BlockCache::GetSingleton()->StartInstrumentation(metric_entity());
+ cfile::BlockCache::GetSingleton()->StartInstrumentation(
+ metric_entity(), opts_.block_cache_metrics_policy());
RETURN_NOT_OK(ThreadPoolBuilder("init").set_max_threads(1).Build(&init_pool_));
diff --git a/src/kudu/master/master_options.cc
b/src/kudu/master/master_options.cc
index e07dfbb..f95d2f0 100644
--- a/src/kudu/master/master_options.cc
+++ b/src/kudu/master/master_options.cc
@@ -39,7 +39,8 @@ TAG_FLAG(master_addresses, stable);
namespace kudu {
namespace master {
-MasterOptions::MasterOptions() {
+MasterOptions::MasterOptions()
+ : block_cache_metrics_policy_(Cache::ExistingMetricsPolicy::kKeep) {
rpc_opts.default_port = Master::kDefaultPort;
if (!FLAGS_master_addresses.empty()) {
diff --git a/src/kudu/master/master_options.h b/src/kudu/master/master_options.h
index 09b7b0b..b9de110 100644
--- a/src/kudu/master/master_options.h
+++ b/src/kudu/master/master_options.h
@@ -20,6 +20,7 @@
#include <vector>
#include "kudu/kserver/kserver_options.h"
+#include "kudu/util/cache.h"
#include "kudu/util/net/net_util.h"
#include "kudu/util/status.h"
@@ -55,8 +56,17 @@ struct MasterOptions : public kserver::KuduServerOptions {
master_addresses_ = std::move(addresses);
}
+ void set_block_cache_metrics_policy(Cache::ExistingMetricsPolicy b) {
+ block_cache_metrics_policy_ = b;
+ }
+
+ Cache::ExistingMetricsPolicy block_cache_metrics_policy() const {
+ return block_cache_metrics_policy_;
+ }
+
private:
std::vector<HostPort> master_addresses_;
+ Cache::ExistingMetricsPolicy block_cache_metrics_policy_;
};
} // namespace master
diff --git a/src/kudu/master/table_locations_cache.cc
b/src/kudu/master/table_locations_cache.cc
index a903889..7c82bf4 100644
--- a/src/kudu/master/table_locations_cache.cc
+++ b/src/kudu/master/table_locations_cache.cc
@@ -115,7 +115,7 @@ void TableLocationsCache::Remove(const std::string&
table_id) {
// Set metrics for the cache.
void TableLocationsCache::SetMetrics(std::unique_ptr<CacheMetrics> metrics) {
- cache_->SetMetrics(std::move(metrics));
+ cache_->SetMetrics(std::move(metrics), Cache::ExistingMetricsPolicy::kKeep);
}
TableLocationsCache::EntryHandle::EntryHandle()
diff --git a/src/kudu/util/cache-test.cc b/src/kudu/util/cache-test.cc
index 88368cb..e96dda7 100644
--- a/src/kudu/util/cache-test.cc
+++ b/src/kudu/util/cache-test.cc
@@ -163,7 +163,7 @@ class CacheBaseTest : public KuduTest,
scoped_refptr<MetricEntity> entity = METRIC_ENTITY_server.Instantiate(
&metric_registry_, "test");
unique_ptr<BlockCacheMetrics> metrics(new BlockCacheMetrics(entity));
- cache_->SetMetrics(std::move(metrics));
+ cache_->SetMetrics(std::move(metrics),
Cache::ExistingMetricsPolicy::kKeep);
}
}
diff --git a/src/kudu/util/cache.cc b/src/kudu/util/cache.cc
index 073fa51..72c69bc 100644
--- a/src/kudu/util/cache.cc
+++ b/src/kudu/util/cache.cc
@@ -567,14 +567,16 @@ class ShardedCache : public Cache {
STLDeleteElements(&shards_);
}
- void SetMetrics(std::unique_ptr<CacheMetrics> metrics) override {
- // TODO(KUDU-2165): reuse of the Cache singleton across multiple
MiniCluster servers
- // causes TSAN errors. So, we'll ensure that metrics only get attached
once, from
- // whichever server starts first. This has the downside that, in test
builds, we won't
- // get accurate cache metrics, but that's probably better than spurious
failures.
+ void SetMetrics(std::unique_ptr<CacheMetrics> metrics,
+ ExistingMetricsPolicy metrics_policy) override {
std::lock_guard<decltype(metrics_lock_)> l(metrics_lock_);
- if (metrics_) {
- CHECK(IsGTest()) << "Metrics should only be set once per Cache
singleton";
+ if (metrics_ && metrics_policy == ExistingMetricsPolicy::kKeep) {
+ // KUDU-2165: reuse of the Cache singleton across multiple
InternalMiniCluster
+ // servers causes TSAN errors. So, we'll ensure that metrics only get
+ // attached once, from whichever server starts first. This has the
downside
+ // that, in test builds, we won't get accurate cache metrics, but that's
+ // probably better than spurious failures.
+ CHECK(IsGTest()) << "Metrics should only be set once per Cache";
return;
}
metrics_ = std::move(metrics);
diff --git a/src/kudu/util/cache.h b/src/kudu/util/cache.h
index ff3a309..7a2d785 100644
--- a/src/kudu/util/cache.h
+++ b/src/kudu/util/cache.h
@@ -65,8 +65,24 @@ class Cache {
// function that was passed to the constructor.
virtual ~Cache();
+ // The behavior when calling SetMetrics() when 'metrics_' is already set.
+ enum class ExistingMetricsPolicy {
+ // Calling SetMetrics() again will be a no-op. This is appropriate in tests
+ // that use a singleton cache that is shared across multiple daemons in the
+ // same process, at the cost of not having accurate cache metrics. This is
+ // useful for avoiding races between the destruction of existing metrics
+ // and the setting of new metrics in new daemons. It is expected that this
+ // is only used in tests.
+ kKeep,
+
+ // SetMetrics() will overwrite the existing metrics. It is up to callers to
+ // ensure this is safe, e.g. by destructing the entity that owned the
+ // original metrics.
+ kReset,
+ };
// Set the cache metrics to update corresponding counters accordingly.
- virtual void SetMetrics(std::unique_ptr<CacheMetrics> metrics) = 0;
+ virtual void SetMetrics(std::unique_ptr<CacheMetrics> metrics,
+ ExistingMetricsPolicy metrics_policy) = 0;
// Opaque handle to an entry stored in the cache.
struct Handle { };
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index c992f2f..6c02467 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -472,7 +472,7 @@ FileCache::FileCache(const string& cache_name,
running_(1) {
if (entity) {
unique_ptr<FileCacheMetrics> metrics(new FileCacheMetrics(entity));
- cache_->SetMetrics(std::move(metrics));
+ cache_->SetMetrics(std::move(metrics),
Cache::ExistingMetricsPolicy::kKeep);
}
LOG(INFO) << Substitute("Constructed file cache $0 with capacity $1",
cache_name, max_open_files);
diff --git a/src/kudu/util/nvm_cache.cc b/src/kudu/util/nvm_cache.cc
index e4f0508..dc7d290 100644
--- a/src/kudu/util/nvm_cache.cc
+++ b/src/kudu/util/nvm_cache.cc
@@ -51,6 +51,7 @@
#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"
+#include "kudu/util/test_util_prod.h"
#ifndef MEMKIND_PMEM_MIN_SIZE
#define MEMKIND_PMEM_MIN_SIZE (1024 * 1024 * 16) // Taken from memkind 1.9.0.
@@ -656,7 +657,12 @@ class ShardedLRUCache : public Cache {
return reinterpret_cast<LRUHandle*>(handle->get())->val_ptr();
}
- virtual void SetMetrics(unique_ptr<CacheMetrics> metrics) OVERRIDE {
+ virtual void SetMetrics(unique_ptr<CacheMetrics> metrics,
+ Cache::ExistingMetricsPolicy metrics_policy)
OVERRIDE {
+ if (metrics_ && metrics_policy == Cache::ExistingMetricsPolicy::kKeep) {
+ CHECK(IsGTest()) << "Metrics should only be set once per Cache";
+ return;
+ }
metrics_ = std::move(metrics);
for (const auto& shard : shards_) {
shard->SetMetrics(metrics_.get());
diff --git a/src/kudu/util/ttl_cache.h b/src/kudu/util/ttl_cache.h
index 1a0c3ee..5ee6233 100644
--- a/src/kudu/util/ttl_cache.h
+++ b/src/kudu/util/ttl_cache.h
@@ -209,7 +209,7 @@ class TTLCache {
void SetMetrics(std::unique_ptr<TTLCacheMetrics> metrics) {
// Keep a copy of the pointer to the metrics: the FIFO cache is the owner.
metrics_ = metrics.get();
- cache_->SetMetrics(std::move(metrics));
+ cache_->SetMetrics(std::move(metrics),
Cache::ExistingMetricsPolicy::kKeep);
}
private: