Repository: kudu Updated Branches: refs/heads/master b7d2780ff -> 11ad075a1
KUDU-1048 master should show versions of tservers, version summary This patch adds a version summary table and a total count of registered tablet servers to /tablet-servers. It also fixes the display of the registration, which was printing in red font. Sample: https://raw.githubusercontent.com/wdberkeley/kudu-cr/master/KUDU-1048-improved.png Change-Id: Idd203209e3d99292018801b94ec2904b6634854f Reviewed-on: http://gerrit.cloudera.org:8080/4104 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/11ad075a Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/11ad075a Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/11ad075a Branch: refs/heads/master Commit: 11ad075a113dc4c63e3074b58667b587e7308a06 Parents: b7d2780 Author: Will Berkeley <[email protected]> Authored: Sat Aug 20 12:27:50 2016 -0400 Committer: Todd Lipcon <[email protected]> Committed: Mon Aug 29 22:27:43 2016 +0000 ---------------------------------------------------------------------- src/kudu/master/master-path-handlers.cc | 37 +++++++++++++++++++++++----- src/kudu/master/ts_descriptor.cc | 11 +++++++++ src/kudu/master/ts_descriptor.h | 3 +++ src/kudu/master/ts_manager.cc | 9 +------ 4 files changed, 46 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/11ad075a/src/kudu/master/master-path-handlers.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/master-path-handlers.cc b/src/kudu/master/master-path-handlers.cc index e45ad0e..2eb6120 100644 --- a/src/kudu/master/master-path-handlers.cc +++ b/src/kudu/master/master-path-handlers.cc @@ -17,6 +17,7 @@ #include "kudu/master/master-path-handlers.h" +#include <array> #include <algorithm> #include <boost/bind.hpp> #include <map> @@ -46,8 +47,10 @@ namespace kudu { +using std::array; using consensus::ConsensusStatePB; using consensus::RaftPeerPB; +using std::map; using std::pair; using std::shared_ptr; using std::string; @@ -66,18 +69,40 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& req, master_->ts_manager()->GetAllDescriptors(&descs); *output << "<h1>Tablet Servers</h1>\n"; + *output << Substitute("<p>There are $0 registered tablet servers.</p>", descs.size()); - *output << "<table class='table table-striped'>\n"; - *output << " <tr><th>UUID</th><th>Time since heartbeat</th><th>Registration</th></tr>\n"; + map<string, array<int, 2>> version_counts; + vector<string> tserver_rows; for (const std::shared_ptr<TSDescriptor>& desc : descs) { const string time_since_hb = StringPrintf("%.1fs", desc->TimeSinceHeartbeat().ToSeconds()); ServerRegistrationPB reg; desc->GetRegistration(®); - *output << Substitute("<tr><th>$0</th><td>$1</td><td><code>$2</code></td></tr>\n", - RegistrationToHtml(reg, desc->permanent_uuid()), - time_since_hb, - EscapeForHtmlToString(reg.ShortDebugString())); + + if (desc->PresumedDead()) { + version_counts[reg.software_version()][1]++; + } else { + version_counts[reg.software_version()][0]++; + } + string row = Substitute("<tr><th>$0</th><td>$1</td><td><pre><code>$2</code></pre></td></tr>\n", + RegistrationToHtml(reg, desc->permanent_uuid()), + time_since_hb, + EscapeForHtmlToString(reg.ShortDebugString())); + tserver_rows.push_back(row); + } + + *output << "<h3>Version Summary</h3>"; + *output << "<table class='table table-striped'>\n"; + *output << "<tr><th>Version</th><th>Count (Live)</th><th>Count (Dead)</th></tr>\n"; + for (const auto& entry : version_counts) { + *output << Substitute("<tr><td>$0</td><td>$1</td><td>$2</td></tr>\n", + entry.first, entry.second[0], entry.second[1]); } + *output << "</table>"; + + *output << "<h3>Registrations</h3>"; + *output << "<table class='table table-striped'>\n"; + *output << "<tr><th>UUID</th><th>Time since heartbeat</th><th>Registration</th></tr>\n"; + *output << JoinStrings(tserver_rows, "\n"); *output << "</table>\n"; } http://git-wip-us.apache.org/repos/asf/kudu/blob/11ad075a/src/kudu/master/ts_descriptor.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc index dd28cb2..f3f4633 100644 --- a/src/kudu/master/ts_descriptor.cc +++ b/src/kudu/master/ts_descriptor.cc @@ -28,6 +28,13 @@ #include "kudu/master/master.pb.h" #include "kudu/tserver/tserver_admin.proxy.h" #include "kudu/util/net/net_util.h" +#include "kudu/util/flag_tags.h" + +DEFINE_int32(tserver_unresponsive_timeout_ms, 60 * 1000, + "The period of time that a Master can go without receiving a heartbeat from a " + "tablet server before considering it unresponsive. Unresponsive servers are not " + "selected when assigning replicas during table creation or re-replication."); +TAG_FLAG(tserver_unresponsive_timeout_ms, advanced); using std::make_shared; using std::shared_ptr; @@ -136,6 +143,10 @@ MonoDelta TSDescriptor::TimeSinceHeartbeat() const { return now - last_heartbeat_; } +bool TSDescriptor::PresumedDead() const { + return TimeSinceHeartbeat().ToMilliseconds() >= FLAGS_tserver_unresponsive_timeout_ms; +} + int64_t TSDescriptor::latest_seqno() const { std::lock_guard<simple_spinlock> l(lock_); return latest_seqno_; http://git-wip-us.apache.org/repos/asf/kudu/blob/11ad075a/src/kudu/master/ts_descriptor.h ---------------------------------------------------------------------- diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h index 86a953e..701acd1 100644 --- a/src/kudu/master/ts_descriptor.h +++ b/src/kudu/master/ts_descriptor.h @@ -66,6 +66,9 @@ class TSDescriptor { // from this TS. MonoDelta TimeSinceHeartbeat() const; + // Return whether this server is presumed dead based on last heartbeat time. + bool PresumedDead() const; + // Register this tablet server. Status Register(const NodeInstancePB& instance, const ServerRegistrationPB& registration); http://git-wip-us.apache.org/repos/asf/kudu/blob/11ad075a/src/kudu/master/ts_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc index dd95cc5..fd1b36d 100644 --- a/src/kudu/master/ts_manager.cc +++ b/src/kudu/master/ts_manager.cc @@ -24,13 +24,6 @@ #include "kudu/gutil/strings/substitute.h" #include "kudu/master/master.pb.h" #include "kudu/master/ts_descriptor.h" -#include "kudu/util/flag_tags.h" - -DEFINE_int32(tserver_unresponsive_timeout_ms, 60 * 1000, - "The period of time that a Master can go without receiving a heartbeat from a " - "tablet server before considering it unresponsive. Unresponsive servers are not " - "selected when assigning replicas during table creation or re-replication."); -TAG_FLAG(tserver_unresponsive_timeout_ms, advanced); using std::shared_ptr; using std::string; @@ -107,7 +100,7 @@ void TSManager::GetAllLiveDescriptors(vector<shared_ptr<TSDescriptor> > *descs) descs->reserve(servers_by_id_.size()); for (const TSDescriptorMap::value_type& entry : servers_by_id_) { const shared_ptr<TSDescriptor>& ts = entry.second; - if (ts->TimeSinceHeartbeat().ToMilliseconds() < FLAGS_tserver_unresponsive_timeout_ms) { + if (!ts->PresumedDead()) { descs->push_back(ts); } }
