This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new c5993dbf4 KUDU-3589 Wait until masters become voters
c5993dbf4 is described below
commit c5993dbf4f8e706784c690f3d5b38cad75276387
Author: Ádám Bakai <[email protected]>
AuthorDate: Mon Jul 15 12:25:04 2024 +0200
KUDU-3589 Wait until masters become voters
In SecurityITest.TestNonDefaultPrincipalMultipleMaster test, when the
leader was shut down, it was possible that some of the Raft members
are still in learner mode. This makes it impossible for the cluster to
recover and elect a new leader. To solve this test issue, there is an
explicit wait until masters become voters in the Raft consensus. This
was done by using the already existing VerifyVotersOnAllMasters
function. The function was moved from dynamic_multi_master-test to
external_mini_cluster to be usable from security-itest, too.
Change-Id: I1ea46345680ce506206e9335a147b887fc173b50
Reviewed-on: http://gerrit.cloudera.org:8080/21636
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/integration-tests/security-itest.cc | 4 +++
src/kudu/master/dynamic_multi_master-test.cc | 37 +++-----------------------
src/kudu/mini-cluster/external_mini_cluster.cc | 29 ++++++++++++++++++--
src/kudu/mini-cluster/external_mini_cluster.h | 12 +++++++++
4 files changed, 46 insertions(+), 36 deletions(-)
diff --git a/src/kudu/integration-tests/security-itest.cc
b/src/kudu/integration-tests/security-itest.cc
index 4c027cb33..b0ed90152 100644
--- a/src/kudu/integration-tests/security-itest.cc
+++ b/src/kudu/integration-tests/security-itest.cc
@@ -920,6 +920,10 @@ TEST_F(SecurityITest,
TestNonDefaultPrincipalMultipleMaster) {
ASSERT_OK(cluster_->AddMaster());
ASSERT_OK(cluster_->master(2)->WaitForCatalogManager());
+ ASSERT_EVENTUALLY([&] {
+ ASSERT_OK(cluster_->VerifyVotersOnAllMasters(3));
+ });
+
// Tablet servers need to be restarted in order to update their master list.
for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
cluster_->tablet_server(i)->Shutdown();
diff --git a/src/kudu/master/dynamic_multi_master-test.cc
b/src/kudu/master/dynamic_multi_master-test.cc
index f178a2a83..d3c9e0a55 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -203,37 +203,6 @@ Status RunListMasters(ListMastersResponsePB* resp,
ExternalMiniCluster* cluster)
return RunLeaderMasterRPC(list_masters, cluster);
}
-// Verify the 'cluster' contains 'num_masters' on all masters, and that they
-// are all voters. Returns an error if the expected state is not present.
-//
-// This should be used instead of VerifyVoterMastersForCluster() if it is
-// required that all masters have accepted the config change. E.g. tests that
-// restart a cluster after adding a master should verify that all masters agree
-// before restarting, in case lagging masters start up with stale configs.
-//
-// TODO(awong): we should be more robust to starting up with mismatched on-disk
-// configs, if we can help it.
-Status VerifyVotersOnAllMasters(int num_masters, ExternalMiniCluster* cluster)
{
- for (int i = 0; i < cluster->num_masters(); i++) {
- ListMastersResponsePB resp;
- ListMastersRequestPB req;
- RpcController rpc;
- RETURN_NOT_OK(cluster->master_proxy(i)->ListMasters(req, &resp, &rpc));
- if (num_masters != resp.masters_size()) {
- return Status::IllegalState(Substitute("expected $0 masters but got $1",
- num_masters,
resp.masters_size()));
- }
- for (const auto& master : resp.masters()) {
- if ((master.role() != RaftPeerPB::LEADER && master.role() !=
RaftPeerPB::FOLLOWER) ||
- master.member_type() != RaftPeerPB::VOTER ||
- master.registration().rpc_addresses_size() != 1) {
- return Status::IllegalState(Substitute("bad master: $0",
SecureShortDebugString(master)));
- }
- }
- }
- return Status::OK();
-}
-
// Verify the ExternalMiniCluster 'cluster' contains 'num_masters' overall and
// are all VOTERS. Populates the new master addresses in 'master_hps', if not
// nullptr. Returns an error if the expected state is not present.
@@ -1709,7 +1678,7 @@ TEST_F(AutoAddMasterTest, TestAddWithOnGoingDdl) {
ASSERT_OK(new_master->WaitForCatalogManager());
num_masters++;
ASSERT_EVENTUALLY([&] {
- ASSERT_OK(VerifyVotersOnAllMasters(num_masters, cluster_.get()));
+ ASSERT_OK(cluster_->VerifyVotersOnAllMasters(num_masters));
});
{
std::lock_guard l(master_addrs_lock);
@@ -1794,7 +1763,7 @@ TEST_F(AutoAddMasterTest, TestAddNewMaster) {
ASSERT_OK(peer->WaitForCatalogManager());
auto expected_num_masters = ++idx;
ASSERT_EVENTUALLY([&] {
- ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get()));
+ ASSERT_OK(cluster_->VerifyVotersOnAllMasters(expected_num_masters));
});
NO_FATALS(cluster_->AssertNoCrashes());
}
@@ -1821,7 +1790,7 @@ TEST_F(MinidumpTest, TestAddNewMasterMinidumpsEnabled) {
ASSERT_OK(peer->WaitForCatalogManager());
auto expected_num_masters = ++idx;
ASSERT_EVENTUALLY([&] {
- ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get()));
+ ASSERT_OK(cluster_->VerifyVotersOnAllMasters(expected_num_masters));
});
NO_FATALS(cluster_->AssertNoCrashes());
}
diff --git a/src/kudu/mini-cluster/external_mini_cluster.cc
b/src/kudu/mini-cluster/external_mini_cluster.cc
index a935e17c1..a8044e33c 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.cc
+++ b/src/kudu/mini-cluster/external_mini_cluster.cc
@@ -35,17 +35,20 @@
#include "kudu/client/client.h"
#include "kudu/client/master_rpc.h"
+#include "kudu/consensus/metadata.pb.h"
#include "kudu/fs/default_key_provider.h"
#include "kudu/fs/fs.pb.h"
#include "kudu/fs/key_provider.h"
#include "kudu/postgres/mini_postgres.h"
#include "kudu/ranger-kms/mini_ranger_kms.h"
#include "kudu/rpc/rpc_header.pb.h"
+#include "kudu/tablet/metadata.pb.h"
#if !defined(NO_CHRONY)
#include "kudu/clock/test/mini_chronyd.h"
#endif
#include "kudu/common/wire_protocol.h"
#include "kudu/common/wire_protocol.pb.h"
+#include "kudu/consensus/consensus.proxy.h"
#include "kudu/gutil/basictypes.h"
#include "kudu/gutil/strings/escaping.h"
#include "kudu/gutil/strings/join.h"
@@ -63,12 +66,10 @@
#include "kudu/security/test/mini_kdc.h"
#include "kudu/server/server_base.pb.h"
#include "kudu/server/server_base.proxy.h"
-#include "kudu/tablet/metadata.pb.h"
#include "kudu/tablet/tablet.pb.h"
#include "kudu/tserver/tserver.pb.h"
#include "kudu/tserver/tserver_admin.proxy.h"
#include "kudu/tserver/tserver_service.proxy.h"
-#include "kudu/consensus/consensus.proxy.h"
#include "kudu/util/async_util.h"
#include "kudu/util/env.h"
#include "kudu/util/env_util.h"
@@ -94,8 +95,11 @@ using kudu::client::internal::ConnectToClusterRpc;
#if !defined(NO_CHRONY)
using kudu::clock::MiniChronyd;
#endif
+using kudu::master::ListMastersRequestPB;
+using kudu::master::ListMastersResponsePB;
using kudu::master::ListTablesRequestPB;
using kudu::master::ListTablesResponsePB;
+using kudu::consensus::RaftPeerPB;
using kudu::master::MasterServiceProxy;
using kudu::pb_util::SecureDebugString;
using kudu::pb_util::SecureShortDebugString;
@@ -996,6 +1000,27 @@ Status ExternalMiniCluster::WaitForTabletsRunning(
return Status::TimedOut(SecureDebugString(resp));
}
+Status ExternalMiniCluster::VerifyVotersOnAllMasters(int num_masters) {
+ for (int i = 0; i < this->num_masters(); i++) {
+ ListMastersResponsePB resp;
+ ListMastersRequestPB req;
+ RpcController rpc;
+ RETURN_NOT_OK(master_proxy(i)->ListMasters(req, &resp, &rpc));
+ if (num_masters != resp.masters_size()) {
+ return Status::IllegalState(Substitute("expected $0 masters but got $1",
+ num_masters,
resp.masters_size()));
+ }
+ for (const auto& master : resp.masters()) {
+ if ((master.role() != RaftPeerPB::LEADER && master.role() !=
RaftPeerPB::FOLLOWER) ||
+ master.member_type() != RaftPeerPB::VOTER ||
+ master.registration().rpc_addresses_size() != 1) {
+ return Status::IllegalState(Substitute("bad master: $0",
SecureShortDebugString(master)));
+ }
+ }
+ }
+ return Status::OK();
+}
+
ExternalMaster* ExternalMiniCluster::leader_master() {
int idx = 0;
Status s = GetLeaderMasterIndex(&idx);
diff --git a/src/kudu/mini-cluster/external_mini_cluster.h
b/src/kudu/mini-cluster/external_mini_cluster.h
index b60dcc4d1..449f0b5c2 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.h
+++ b/src/kudu/mini-cluster/external_mini_cluster.h
@@ -613,6 +613,18 @@ class ExternalMiniCluster : public MiniCluster {
size_t idx,
scoped_refptr<ExternalMaster>* master);
+ // Verify the cluster contains 'num_masters' on all masters, and that they
+ // are all voters. Returns an error if the expected state is not present.
+ //
+ // This should be used if it is required that all masters have accepted the
config
+ // change. E.g. tests that restart a cluster after adding a master should
verify
+ // that all masters agree before restarting, in case lagging masters start
up with
+ // stale configs.
+ //
+ // TODO(awong): we should be more robust to starting up with mismatched
on-disk
+ // configs, if we can help it.
+ Status VerifyVotersOnAllMasters(int num_masters);
+
private:
Status StartMasters();