KUDU-2309: /masters can show the wrong list of masters

The ListMasters RPC (on which the /masters page is based) returned
masters based on the -master_addresses flag. It should return masters
based on the consensus configuration. Since we verify the two sets of
addresses are the same at startup, this usually makes no difference.
However, if a master is replaced by a server with a new UUID,
previously we would report the new UUID, even though it may not be
part of the previously established quorum. This patch adds an
additional check that the UUIDs match, and returns an error along with
the registration information if there is a mismatch.

Screenshot for down master: 
https://github.com/wdberkeley/kudu/blob/kudu2309screenshots/www/mastersonedown.png
Screenshot for UUID mismatch: 
https://github.com/wdberkeley/kudu/blob/kudu2309screenshots/www/mastersmismatch.png

Change-Id: I603ebc22e998bac9bd00edc939577ae339587f26
Reviewed-on: http://gerrit.cloudera.org:8080/9378
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/13fc0666
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/13fc0666
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/13fc0666

Branch: refs/heads/master
Commit: 13fc0666d6eecff021ca92348644d674c994cee1
Parents: dbcd621
Author: Will Berkeley <wdberke...@apache.org>
Authored: Wed Feb 21 04:21:47 2018 -0800
Committer: Will Berkeley <wdberke...@gmail.com>
Committed: Tue Mar 6 22:32:49 2018 +0000

----------------------------------------------------------------------
 src/kudu/master/master.cc               | 44 ++++++++++++++++------------
 src/kudu/master/master_path_handlers.cc | 10 +++++--
 src/kudu/master/sys_catalog.h           |  2 +-
 www/masters.mustache                    | 24 +++++++++++----
 4 files changed, 53 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/src/kudu/master/master.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc
index 5aff500..d198777 100644
--- a/src/kudu/master/master.cc
+++ b/src/kudu/master/master.cc
@@ -18,10 +18,8 @@
 #include "kudu/master/master.h"
 
 #include <algorithm>
-#include <iterator>
 #include <memory>
 #include <ostream>
-#include <set>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -38,7 +36,6 @@
 #include "kudu/fs/fs_manager.h"
 #include "kudu/gutil/bind.h"
 #include "kudu/gutil/bind_helpers.h"
-#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/move.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -308,34 +305,46 @@ Status Master::ListMasters(std::vector<ServerEntryPB>* 
masters) const {
     
local_entry.mutable_instance_id()->CopyFrom(catalog_manager_->NodeInstance());
     RETURN_NOT_OK(GetMasterRegistration(local_entry.mutable_registration()));
     local_entry.set_role(RaftPeerPB::LEADER);
-    masters->push_back(local_entry);
+    masters->emplace_back(std::move(local_entry));
     return Status::OK();
   }
 
-  // Since --master_addresses may contain duplicates, including different names
-  // for the same server, we deduplicate the masters by UUID here.
-  auto uuid_cmp = [](const ServerEntryPB& left, const ServerEntryPB& right) {
-    return left.instance_id().permanent_uuid() < 
right.instance_id().permanent_uuid();
-  };
-  std::set<ServerEntryPB, decltype(uuid_cmp)> masters_by_uuid(uuid_cmp);
-  for (const HostPort& peer_addr : opts_.master_addresses) {
+  RETURN_NOT_OK(catalog_manager_->CheckOnline());
+  auto consensus = 
catalog_manager_->sys_catalog()->tablet_replica()->shared_consensus();
+  if (!consensus) {
+    return Status::IllegalState("consensus not running");
+  }
+  const auto config = consensus->CommittedConfig();
+
+  masters->clear();
+  for (const auto& peer : config.peers()) {
     ServerEntryPB peer_entry;
-    Status s = GetMasterEntryForHost(messenger_, peer_addr, &peer_entry);
+    HostPort hp;
+    Status s = HostPortFromPB(peer.last_known_addr(), &hp).AndThen([&] {
+      return GetMasterEntryForHost(messenger_, hp, &peer_entry);
+    });
     if (!s.ok()) {
       s = s.CloneAndPrepend(
-          Substitute("Unable to get registration information for peer ($0)",
-                     peer_addr.ToString()));
+          Substitute("Unable to get registration information for peer $0 ($1)",
+                     peer.permanent_uuid(),
+                     hp.ToString()));
       LOG(WARNING) << s.ToString();
       StatusToPB(s, peer_entry.mutable_error());
+    } else if (peer_entry.instance_id().permanent_uuid() != 
peer.permanent_uuid()) {
+      StatusToPB(Status::IllegalState(
+          Substitute("mismatched UUIDs: expected UUID $0 from master at $1, 
but got UUID $2",
+                     peer.permanent_uuid(),
+                     hp.ToString(),
+                     peer_entry.instance_id().permanent_uuid())),
+                 peer_entry.mutable_error());
     }
-    InsertIfNotPresent(&masters_by_uuid, peer_entry);
+    masters->emplace_back(std::move(peer_entry));
   }
-
-  std::copy(masters_by_uuid.begin(), masters_by_uuid.end(), 
std::back_inserter(*masters));
   return Status::OK();
 }
 
 Status Master::GetMasterHostPorts(std::vector<HostPortPB>* hostports) const {
+  RETURN_NOT_OK(catalog_manager_->CheckOnline());
   auto consensus = 
catalog_manager_->sys_catalog()->tablet_replica()->shared_consensus();
   if (!consensus) {
     return Status::IllegalState("consensus not running");
@@ -360,6 +369,5 @@ Status Master::GetMasterHostPorts(std::vector<HostPortPB>* 
hostports) const {
   return Status::OK();
 }
 
-
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/src/kudu/master/master_path_handlers.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master_path_handlers.cc 
b/src/kudu/master/master_path_handlers.cc
index 15da46f..ae9de5c 100644
--- a/src/kudu/master/master_path_handlers.cc
+++ b/src/kudu/master/master_path_handlers.cc
@@ -389,13 +389,19 @@ void MasterPathHandlers::HandleMasters(const 
Webserver::WebRequest& /*req*/,
   }
   output->Set("even_masters", masters.size() % 2 == 0);
   output->Set("masters", EasyJson::kArray);
+  output->Set("errors", EasyJson::kArray);
+  output->Set("has_no_errors", true);
   for (const ServerEntryPB& master : masters) {
-    EasyJson master_json = (*output)["masters"].PushBack(EasyJson::kObject);
     if (master.has_error()) {
+      output->Set("has_no_errors", false);
+      EasyJson error_json = (*output)["errors"].PushBack(EasyJson::kObject);
       Status error = StatusFromPB(master.error());
-      master_json["error"] = error.ToString();
+      error_json["uuid"] = master.has_instance_id() ?
+                           master.instance_id().permanent_uuid() : 
"Unavailable";
+      error_json["error"] = error.ToString();
       continue;
     }
+    EasyJson master_json = (*output)["masters"].PushBack(EasyJson::kObject);
     const ServerRegistrationPB& reg = master.registration();
     master_json["uuid"] = master.instance_id().permanent_uuid();
     if (!reg.http_addresses().empty()) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/src/kudu/master/sys_catalog.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/sys_catalog.h b/src/kudu/master/sys_catalog.h
index 49ae07f..8aa92eb 100644
--- a/src/kudu/master/sys_catalog.h
+++ b/src/kudu/master/sys_catalog.h
@@ -198,7 +198,7 @@ class SysCatalogTable {
   // NOTE: This is the "server-side" schema, so it must have the column IDs.
   Schema BuildTableSchema();
 
-  // Returns 'Status::OK()' if the WriteTranasction completed
+  // Returns 'Status::OK()' if the WriteTransaction completed
   Status SyncWrite(const tserver::WriteRequestPB *req, 
tserver::WriteResponsePB *resp);
 
   void SysCatalogStateChanged(const std::string& tablet_id, const std::string& 
reason);

http://git-wip-us.apache.org/repos/asf/kudu/blob/13fc0666/www/masters.mustache
----------------------------------------------------------------------
diff --git a/www/masters.mustache b/www/masters.mustache
index 0b7a6b2..0ce7649 100644
--- a/www/masters.mustache
+++ b/www/masters.mustache
@@ -27,7 +27,7 @@ under the License.
   </div>
 {{/even_masters}}
 {{^error}}
-  <h2>Masters</h2>
+  <h2>Live Masters</h2>
   <table class='table table-striped'>
     <thead><tr>
       <th>UUID</th>
@@ -37,16 +37,28 @@ under the License.
     <tbody>
     {{#masters}}
       <tr>
-        {{#error}}
-          <td colspan=2><b style="color:red">{{error}}</b></td>
-        {{/error}}
-        {{^error}}
           <td>{{#target}}<a 
href="{{.}}">{{/target}}{{uuid}}{{#target}}</a>{{/target}}</td>
           <td>{{role}}</td>
           <td><pre><code>{{registration}}</code></pre></td>
-        {{/error}}
       </tr>
     {{/masters}}
     </tbody>
   </table>
+  {{^has_no_errors}}
+  <h2>Errors</h2>
+  <table class='table table-striped'>
+    <thead><tr>
+      <th>UUID</th>
+      <th>Error</th>
+    </tr></thead>
+    <tbody>
+    {{#errors}}
+      <tr>
+        <td>{{uuid}}</td>
+        <td><pre><code>{{error}}</code></pre></td>
+      </tr>
+    {{/errors}}
+    </tbody>
+  </table>
+  {{/has_no_errors}}
 {{/error}}

Reply via email to