Repository: kudu
Updated Branches:
  refs/heads/branch-1.5.x 82bcc8c96 -> 74a922d39


KUDU-2113 Segfault because of consensus conflict and missing tablet server

Sometimes the master doesn't report all tablet servers. This can
happen, for example, when the master is starting. When this
happened and there was also a consensus conflict, it caused a
segfault. This fixes the segfault and contains a regression
test.

Change-Id: If80dbe687e70bd21de9dba9e81d71d66a5bd75e0
Reviewed-on: http://gerrit.cloudera.org:8080/7864
Reviewed-by: Adar Dembo <a...@cloudera.com>
Tested-by: Will Berkeley <wdberke...@gmail.com>
(cherry picked from commit b54eab1f31dd2180e103f1cddbaa2194396396c5)
Reviewed-on: http://gerrit.cloudera.org:8080/10073
Reviewed-by: Will Berkeley <wdberke...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/74a922d3
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/74a922d3
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/74a922d3

Branch: refs/heads/branch-1.5.x
Commit: 74a922d39b8fad6bea4375ba7253f7a6e482adbc
Parents: 82bcc8c
Author: Will Berkeley <wdberke...@apache.org>
Authored: Mon Aug 28 16:26:21 2017 -0700
Committer: Will Berkeley <wdberke...@gmail.com>
Committed: Mon Apr 16 16:02:47 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/ksck-test.cc | 38 +++++++++++++++++++++++++++++++++++---
 src/kudu/tools/ksck.cc      | 20 ++++++++++----------
 src/kudu/tools/ksck.h       |  5 +++--
 3 files changed, 48 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/74a922d3/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 22caab5..7031ec6 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -543,12 +543,12 @@ TEST_F(KsckTest, TestTabletNotRunning) {
 }
 
 // Test for a bug where we weren't properly handling a tserver not reported by 
the master.
-TEST_F(KsckTest, TestMissingTserver) {
+TEST_F(KsckTest, TestMasterNotReportingTabletServer) {
   CreateOneSmallReplicatedTable();
 
   // Delete a tablet server from the master's list. This simulates a situation
-  // where the master is starting and hasn't listed all tablet servers yet, but
-  // tablets from other tablet servers are listing the missing tablet server 
as a peer.
+  // where the master is starting and doesn't list all tablet servers yet, but
+  // tablets from other tablet servers are listing a missing tablet server as 
a peer.
   EraseKeyReturnValuePtr(&master_->tablet_servers_, "ts-id-0");
   Status s = RunKsck();
   ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
@@ -560,5 +560,37 @@ TEST_F(KsckTest, TestMissingTserver) {
       " test | UNDER-REPLICATED | 3             | 0       | 3                | 
0");
 }
 
+// KUDU-2113: Test for a bug where we weren't properly handling a tserver not
+// reported by the master when there was also a consensus conflict.
+TEST_F(KsckTest, TestMasterNotReportingTabletServerWithConsensusConflict) {
+  CreateOneSmallReplicatedTable();
+
+  // Delete a tablet server from the cluster's list as in 
TestMasterNotReportingTabletServer.
+  EraseKeyReturnValuePtr(&master_->tablet_servers_, "ts-id-0");
+
+  // Now engineer a consensus conflict.
+  const shared_ptr<KsckTabletServer>& ts = FindOrDie(master_->tablet_servers_, 
"ts-id-1");
+  auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map_,
+                                  std::make_pair("ts-id-1", "tablet-id-1"));
+  cstate.set_leader_uuid("ts-id-1");
+
+  Status s = RunKsck();
+  ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
+  ASSERT_STR_CONTAINS(err_stream_.str(), "Table test has 3 under-replicated 
tablet(s)");
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+      "The consensus matrix is:\n"
+      " Config source |         Voters         | Current term | Config index | 
Committed?\n"
+      
"---------------+------------------------+--------------+--------------+------------\n"
+      " master        | A*  B   C              |              |              | 
Yes\n"
+      " A             | [config not available] |              |              | 
\n"
+      " B             | A   B*  C              | 0            |              | 
Yes\n"
+      " C             | A*  B   C              | 0            |              | 
Yes");
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+      "Table Summary\n"
+      " Name |      Status      | Total Tablets | Healthy | Under-replicated | 
Unavailable\n"
+      
"------+------------------+---------------+---------+------------------+-------------\n"
+      " test | UNDER-REPLICATED | 3             | 0       | 3                | 
0");
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/74a922d3/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 957c39f..eb376c4 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -884,23 +884,23 @@ Ksck::CheckResult Ksck::VerifyTablet(const 
shared_ptr<KsckTablet>& tablet, int t
     vector<string> committed{"Yes"};
 
     // Fill out the columns with info from the replicas.
-    for (const auto& replica : replica_infos) {
-      char label = FindOrDie(peer_uuid_mapping, replica.ts->uuid());
+    for (const auto& replica_info : replica_infos) {
+      char label = FindOrDie(peer_uuid_mapping, 
replica_info.replica->ts_uuid());
       sources.emplace_back(1, label);
-      if (!replica.consensus_state) {
+      if (!replica_info.consensus_state) {
         voters.emplace_back("[config not available]");
         terms.emplace_back("");
         indexes.emplace_back("");
         committed.emplace_back("");
         continue;
       }
-      voters.push_back(format_peers(peer_uuid_mapping, 
replica.consensus_state.get()));
-      terms.push_back(replica.consensus_state->term ?
-                      std::to_string(replica.consensus_state->term.get()) : 
"");
-      indexes.push_back(replica.consensus_state->opid_index ?
-                        
std::to_string(replica.consensus_state->opid_index.get()) : "");
-      committed.emplace_back(replica.consensus_state->type == 
KsckConsensusConfigType::PENDING ?
-                          "No" : "Yes");
+      voters.push_back(format_peers(peer_uuid_mapping, 
replica_info.consensus_state.get()));
+      terms.push_back(replica_info.consensus_state->term ?
+                      std::to_string(replica_info.consensus_state->term.get()) 
: "");
+      indexes.push_back(replica_info.consensus_state->opid_index ?
+                        
std::to_string(replica_info.consensus_state->opid_index.get()) : "");
+      committed.emplace_back(
+          replica_info.consensus_state->type == 
KsckConsensusConfigType::PENDING ? "No" : "Yes");
     }
     table.AddColumn("Config source", std::move(sources));
     table.AddColumn("Voters", std::move(voters));

http://git-wip-us.apache.org/repos/asf/kudu/blob/74a922d3/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index 8f541bb..c0cc81a 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -290,10 +290,11 @@ class KsckTabletServer {
 
  protected:
   friend class KsckTest;
-  FRIEND_TEST(KsckTest, TestMismatchedAssignments);
   FRIEND_TEST(KsckTest, TestConsensusConflictExtraPeer);
-  FRIEND_TEST(KsckTest, TestConsensusConflictMissingPeer);
   FRIEND_TEST(KsckTest, TestConsensusConflictDifferentLeader);
+  FRIEND_TEST(KsckTest, TestConsensusConflictMissingPeer);
+  FRIEND_TEST(KsckTest, 
TestMasterNotReportingTabletServerWithConsensusConflict);
+  FRIEND_TEST(KsckTest, TestMismatchedAssignments);
 
   enum State {
     kUninitialized,

Reply via email to