KUDU-2309: /masters can show the wrong list of masters The ListMasters RPC (on which the /masters page is based) returned masters based on the -master_addresses flag. It should return masters based on the consensus configuration. Since we verify the two sets of addresses are the same at startup, this usually makes no difference. However, if a master is replaced by a server with a new UUID, previously we would report the new UUID, even though it may not be part of the previously established quorum. This patch adds an additional check that the UUIDs match, and returns an error along with the registration information if there is a mismatch.
Screenshot for down master: https://github.com/wdberkeley/kudu/blob/kudu2309screenshots/www/mastersonedown.png Screenshot for UUID mismatch: https://github.com/wdberkeley/kudu/blob/kudu2309screenshots/www/mastersmismatch.png Change-Id: I603ebc22e998bac9bd00edc939577ae339587f26 Reviewed-on: http://gerrit.cloudera.org:8080/9378 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/13fc0666 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/13fc0666 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/13fc0666 Branch: refs/heads/master Commit: 13fc0666d6eecff021ca92348644d674c994cee1 Parents: dbcd621 Author: Will Berkeley <[email protected]> Authored: Wed Feb 21 04:21:47 2018 -0800 Committer: Will Berkeley <[email protected]> Committed: Tue Mar 6 22:32:49 2018 +0000 ---------------------------------------------------------------------- src/kudu/master/master.cc | 44 ++++++++++++++++------------ src/kudu/master/master_path_handlers.cc | 10 +++++-- src/kudu/master/sys_catalog.h | 2 +- www/masters.mustache | 24 +++++++++++---- 4 files changed, 53 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/src/kudu/master/master.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc index 5aff500..d198777 100644 --- a/src/kudu/master/master.cc +++ b/src/kudu/master/master.cc @@ -18,10 +18,8 @@ #include "kudu/master/master.h" #include <algorithm> -#include <iterator> #include <memory> #include <ostream> -#include <set> #include <string> #include <type_traits> #include <vector> @@ -38,7 +36,6 @@ #include "kudu/fs/fs_manager.h" #include "kudu/gutil/bind.h" #include "kudu/gutil/bind_helpers.h" -#include "kudu/gutil/map-util.h" #include "kudu/gutil/move.h" #include "kudu/gutil/ref_counted.h" #include "kudu/gutil/strings/substitute.h" @@ -308,34 +305,46 @@ Status Master::ListMasters(std::vector<ServerEntryPB>* masters) const { local_entry.mutable_instance_id()->CopyFrom(catalog_manager_->NodeInstance()); RETURN_NOT_OK(GetMasterRegistration(local_entry.mutable_registration())); local_entry.set_role(RaftPeerPB::LEADER); - masters->push_back(local_entry); + masters->emplace_back(std::move(local_entry)); return Status::OK(); } - // Since --master_addresses may contain duplicates, including different names - // for the same server, we deduplicate the masters by UUID here. - auto uuid_cmp = [](const ServerEntryPB& left, const ServerEntryPB& right) { - return left.instance_id().permanent_uuid() < right.instance_id().permanent_uuid(); - }; - std::set<ServerEntryPB, decltype(uuid_cmp)> masters_by_uuid(uuid_cmp); - for (const HostPort& peer_addr : opts_.master_addresses) { + RETURN_NOT_OK(catalog_manager_->CheckOnline()); + auto consensus = catalog_manager_->sys_catalog()->tablet_replica()->shared_consensus(); + if (!consensus) { + return Status::IllegalState("consensus not running"); + } + const auto config = consensus->CommittedConfig(); + + masters->clear(); + for (const auto& peer : config.peers()) { ServerEntryPB peer_entry; - Status s = GetMasterEntryForHost(messenger_, peer_addr, &peer_entry); + HostPort hp; + Status s = HostPortFromPB(peer.last_known_addr(), &hp).AndThen([&] { + return GetMasterEntryForHost(messenger_, hp, &peer_entry); + }); if (!s.ok()) { s = s.CloneAndPrepend( - Substitute("Unable to get registration information for peer ($0)", - peer_addr.ToString())); + Substitute("Unable to get registration information for peer $0 ($1)", + peer.permanent_uuid(), + hp.ToString())); LOG(WARNING) << s.ToString(); StatusToPB(s, peer_entry.mutable_error()); + } else if (peer_entry.instance_id().permanent_uuid() != peer.permanent_uuid()) { + StatusToPB(Status::IllegalState( + Substitute("mismatched UUIDs: expected UUID $0 from master at $1, but got UUID $2", + peer.permanent_uuid(), + hp.ToString(), + peer_entry.instance_id().permanent_uuid())), + peer_entry.mutable_error()); } - InsertIfNotPresent(&masters_by_uuid, peer_entry); + masters->emplace_back(std::move(peer_entry)); } - - std::copy(masters_by_uuid.begin(), masters_by_uuid.end(), std::back_inserter(*masters)); return Status::OK(); } Status Master::GetMasterHostPorts(std::vector<HostPortPB>* hostports) const { + RETURN_NOT_OK(catalog_manager_->CheckOnline()); auto consensus = catalog_manager_->sys_catalog()->tablet_replica()->shared_consensus(); if (!consensus) { return Status::IllegalState("consensus not running"); @@ -360,6 +369,5 @@ Status Master::GetMasterHostPorts(std::vector<HostPortPB>* hostports) const { return Status::OK(); } - } // namespace master } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/src/kudu/master/master_path_handlers.cc ---------------------------------------------------------------------- diff --git a/src/kudu/master/master_path_handlers.cc b/src/kudu/master/master_path_handlers.cc index 15da46f..ae9de5c 100644 --- a/src/kudu/master/master_path_handlers.cc +++ b/src/kudu/master/master_path_handlers.cc @@ -389,13 +389,19 @@ void MasterPathHandlers::HandleMasters(const Webserver::WebRequest& /*req*/, } output->Set("even_masters", masters.size() % 2 == 0); output->Set("masters", EasyJson::kArray); + output->Set("errors", EasyJson::kArray); + output->Set("has_no_errors", true); for (const ServerEntryPB& master : masters) { - EasyJson master_json = (*output)["masters"].PushBack(EasyJson::kObject); if (master.has_error()) { + output->Set("has_no_errors", false); + EasyJson error_json = (*output)["errors"].PushBack(EasyJson::kObject); Status error = StatusFromPB(master.error()); - master_json["error"] = error.ToString(); + error_json["uuid"] = master.has_instance_id() ? + master.instance_id().permanent_uuid() : "Unavailable"; + error_json["error"] = error.ToString(); continue; } + EasyJson master_json = (*output)["masters"].PushBack(EasyJson::kObject); const ServerRegistrationPB& reg = master.registration(); master_json["uuid"] = master.instance_id().permanent_uuid(); if (!reg.http_addresses().empty()) { http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/src/kudu/master/sys_catalog.h ---------------------------------------------------------------------- diff --git a/src/kudu/master/sys_catalog.h b/src/kudu/master/sys_catalog.h index 49ae07f..8aa92eb 100644 --- a/src/kudu/master/sys_catalog.h +++ b/src/kudu/master/sys_catalog.h @@ -198,7 +198,7 @@ class SysCatalogTable { // NOTE: This is the "server-side" schema, so it must have the column IDs. Schema BuildTableSchema(); - // Returns 'Status::OK()' if the WriteTranasction completed + // Returns 'Status::OK()' if the WriteTransaction completed Status SyncWrite(const tserver::WriteRequestPB *req, tserver::WriteResponsePB *resp); void SysCatalogStateChanged(const std::string& tablet_id, const std::string& reason); http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/www/masters.mustache ---------------------------------------------------------------------- diff --git a/www/masters.mustache b/www/masters.mustache index 0b7a6b2..0ce7649 100644 --- a/www/masters.mustache +++ b/www/masters.mustache @@ -27,7 +27,7 @@ under the License. </div> {{/even_masters}} {{^error}} - <h2>Masters</h2> + <h2>Live Masters</h2> <table class='table table-striped'> <thead><tr> <th>UUID</th> @@ -37,16 +37,28 @@ under the License. <tbody> {{#masters}} <tr> - {{#error}} - <td colspan=2><b style="color:red">{{error}}</b></td> - {{/error}} - {{^error}} <td>{{#target}}<a href="{{.}}">{{/target}}{{uuid}}{{#target}}</a>{{/target}}</td> <td>{{role}}</td> <td><pre><code>{{registration}}</code></pre></td> - {{/error}} </tr> {{/masters}} </tbody> </table> + {{^has_no_errors}} + <h2>Errors</h2> + <table class='table table-striped'> + <thead><tr> + <th>UUID</th> + <th>Error</th> + </tr></thead> + <tbody> + {{#errors}} + <tr> + <td>{{uuid}}</td> + <td><pre><code>{{error}}</code></pre></td> + </tr> + {{/errors}} + </tbody> + </table> + {{/has_no_errors}} {{/error}}
