Repository: kudu Updated Branches: refs/heads/master 9c89c2a9a -> 29c66db48
Add a simple metric for cluster skew This adds a very simple 'cluster_skew' metric to the master that reports on the difference in number of replicas between the most and least loaded tablet servers. This information was already computable from the tablets_num_* metrics available on all the tablet servers, but this centralizes it in one place and handles counting the correct tablet states, so it's much easier to consume. This simple metric should be useful for operators trying to set up simple alerting schemes based on cluster balance. Why not introduce a more comprehensive set of metrics around balance? Because eventually rebalancing should be tightly integrated with the master. This metric is just meant as a useful "canary" for when the rebalancer ought to be run, until a more sophisticated and automated procedure can be put in place. At that time there will likely be better metrics exposed to gauge the balance of the cluster and the behavior of the rebalancer. I also wrote a quick script to simulate placing replicas on tablet servers and measure the resulting distribution of skew. The results of the simulations show skew is almost certainly 6 or less when replica distribution is determined solely by the current power of two choices algorithm with a fixed number of tablet servers. This can provide some guide to operators looking to set a threshold for concerning skew: a value of e.g. 10 should be vanishingly unlikely to result except by some external force like unbalanced re-replication or the addition of a tablet server, so it should suffice as a threshold. Change-Id: I107256de604998cbf9206a8fccb3a43de86f81a8 Reviewed-on: http://gerrit.cloudera.org:8080/10787 Tested-by: Will Berkeley <wdberke...@gmail.com> Reviewed-by: Alexey Serbin <aser...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/72d0981a Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/72d0981a Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/72d0981a Branch: refs/heads/master Commit: 72d0981aa5930d3e0762dad988a991b35603b2f9 Parents: 9c89c2a Author: Will Berkeley <wdberke...@apache.org> Authored: Thu Jun 21 12:21:41 2018 -0700 Committer: Will Berkeley <wdberke...@gmail.com> Committed: Wed Jun 27 18:26:52 2018 +0000 ---------------------------------------------------------------------- src/kudu/master/master.cc | 2 +- src/kudu/master/ts_manager.cc | 35 +++++++++++- src/kudu/master/ts_manager.h | 8 ++- src/kudu/scripts/max_skew_estimate.py | 87 ++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/72d0981a/src/kudu/master/master.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc index a6210e8..61efd76 100644 --- a/src/kudu/master/master.cc +++ b/src/kudu/master/master.cc @@ -118,7 +118,7 @@ GROUP_FLAG_VALIDATOR(hive_metastore_sasl_enabled, ValidateHiveMetastoreSaslEnabl Master::Master(const MasterOptions& opts) : KuduServer("Master", opts, "kudu.master"), state_(kStopped), - ts_manager_(new TSManager()), + ts_manager_(new TSManager(metric_entity_)), catalog_manager_(new CatalogManager(this)), path_handlers_(new MasterPathHandlers(this)), opts_(opts), http://git-wip-us.apache.org/repos/asf/kudu/blob/72d0981a/src/kudu/master/ts_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc index 8b1aa1c..39e4df6 100644 --- a/src/kudu/master/ts_manager.cc +++ b/src/kudu/master/ts_manager.cc @@ -17,17 +17,30 @@ #include "kudu/master/ts_manager.h" +#include <algorithm> +#include <limits> #include <mutex> #include <vector> #include <glog/logging.h> #include "kudu/common/wire_protocol.pb.h" +#include "kudu/gutil/bind.h" +#include "kudu/gutil/bind_helpers.h" #include "kudu/gutil/map-util.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/ts_descriptor.h" +#include "kudu/util/metrics.h" #include "kudu/util/pb_util.h" +METRIC_DEFINE_gauge_int32(server, cluster_replica_skew, + "Cluster Replica Skew", + kudu::MetricUnit::kTablets, + "The difference between the number of replicas on " + "the tablet server hosting the most replicas and " + "the number of replicas on the tablet server hosting " + "the least replicas."); + using std::shared_ptr; using std::string; using std::vector; @@ -36,7 +49,11 @@ using strings::Substitute; namespace kudu { namespace master { -TSManager::TSManager() { +TSManager::TSManager(const scoped_refptr<MetricEntity>& metric_entity) { + METRIC_cluster_replica_skew.InstantiateFunctionGauge( + metric_entity, + Bind(&TSManager::ClusterSkew, Unretained(this))) + ->AutoDetach(&metric_detacher_); } TSManager::~TSManager() { @@ -116,6 +133,22 @@ int TSManager::GetCount() const { return servers_by_id_.size(); } +int TSManager::ClusterSkew() const { + int min_count = std::numeric_limits<int>::max(); + int max_count = 0; + shared_lock<rw_spinlock> l(lock_); + for (const TSDescriptorMap::value_type& entry : servers_by_id_) { + const shared_ptr<TSDescriptor>& ts = entry.second; + if (ts->PresumedDead()) { + continue; + } + int num_live_replicas = ts->num_live_replicas(); + min_count = std::min(min_count, num_live_replicas); + max_count = std::max(max_count, num_live_replicas); + } + return max_count - min_count; +} + } // namespace master } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/72d0981a/src/kudu/master/ts_manager.h ---------------------------------------------------------------------- diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h index 8327203..1e716ff 100644 --- a/src/kudu/master/ts_manager.h +++ b/src/kudu/master/ts_manager.h @@ -23,7 +23,9 @@ #include <vector> #include "kudu/gutil/macros.h" +#include "kudu/gutil/ref_counted.h" #include "kudu/util/locks.h" +#include "kudu/util/metrics.h" #include "kudu/util/status.h" namespace kudu { @@ -49,7 +51,7 @@ typedef std::vector<std::shared_ptr<TSDescriptor>> TSDescriptorVector; // This class is thread-safe. class TSManager { public: - TSManager(); + explicit TSManager(const scoped_refptr<MetricEntity>& metric_entity); virtual ~TSManager(); // Lookup the tablet server descriptor for the given instance identifier. @@ -84,8 +86,12 @@ class TSManager { int GetCount() const; private: + int ClusterSkew() const; + mutable rw_spinlock lock_; + FunctionGaugeDetacher metric_detacher_; + typedef std::unordered_map< std::string, std::shared_ptr<TSDescriptor>> TSDescriptorMap; TSDescriptorMap servers_by_id_; http://git-wip-us.apache.org/repos/asf/kudu/blob/72d0981a/src/kudu/scripts/max_skew_estimate.py ---------------------------------------------------------------------- diff --git a/src/kudu/scripts/max_skew_estimate.py b/src/kudu/scripts/max_skew_estimate.py new file mode 100755 index 0000000..841c75a --- /dev/null +++ b/src/kudu/scripts/max_skew_estimate.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# The purpose of this script is to estimate the distribution of the maximum +# skew produced by Kudu's "power of two choices" placement algorithm, +# which is used to place replicas on tablet servers (at least in Kudu <= 1.7). +import math +import random +import sys + +# Replicates Random::ReservoirSample from kudu/util/random.h. +def reservoir_sample(n, sample_size, avoid): + result = list() + k = 0 + for i in xrange(n): + if i in avoid: + continue + k += 1 + if len(result) < sample_size: + result.append(i) + continue + j = random.randrange(k) + if j < sample_size: + result[j] = i + return result + +# Follows CatalogManager::SelectReplica, which implements the power of two +# choices selection algorithm, except we assume we always have a placement. +def select_replica(num_servers, avoid, counts): + two_choices = reservoir_sample(num_servers, 2, avoid) + assert(len(two_choices) > 0) + assert(len(two_choices) <= 2) + if len(two_choices) == 1: + return two_choices[0] + else: + a, b = two_choices[0], two_choices[1] + if counts[a] < counts[b]: + return a + else: + return b + +# Quickly cribbed from https://stackoverflow.com/a/15589202. +# 'data' must be sorted. +def percentile(data, percentile): + size = len(data) + return data[int(math.ceil((size * percentile) / 100)) - 1] + +def generate_max_skew(num_servers, num_tablets, rf): + counts = {i : 0 for i in xrange(num_servers)} + for t in xrange(num_tablets): + avoid = set() + for r in range(rf): + replica = select_replica(num_servers, avoid, counts) + avoid.add(replica) + counts[replica] += 1 + return max(counts.values()) - min(counts.values()) + +def main(): + args = sys.argv + if len(args) != 5: + print "max_skew_estimate.py <num trials> <num servers> <num_tablets> <repl factor>" + sys.exit(1) + num_trials, num_servers, num_tablets, rf = int(args[1]), int(args[2]), int(args[3]), int(args[4]) + skews = [generate_max_skew(num_servers, num_tablets, rf) for _ in xrange(num_trials)] + skews.sort() + for p in [5, 25, 50, 75, 99]: + print "%02d percentile: %d" % (p, percentile(skews, p)) + +if __name__ == "__main__": + main() +