This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.18.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit e73def54a1955c2fa33664fcfd8c4c6614e3e178 Author: Ádám Bakai <[email protected]> AuthorDate: Mon Jul 15 12:25:04 2024 +0200 KUDU-3589 Wait until masters become voters In SecurityITest.TestNonDefaultPrincipalMultipleMaster test, when the leader was shut down, it was possible that some of the Raft members are still in learner mode. This makes it impossible for the cluster to recover and elect a new leader. To solve this test issue, there is an explicit wait until masters become voters in the Raft consensus. This was done by using the already existing VerifyVotersOnAllMasters function. The function was moved from dynamic_multi_master-test to external_mini_cluster to be usable from security-itest, too. Change-Id: I1ea46345680ce506206e9335a147b887fc173b50 Reviewed-on: http://gerrit.cloudera.org:8080/21636 Tested-by: Alexey Serbin <[email protected]> Reviewed-by: Alexey Serbin <[email protected]> (cherry picked from commit c5993dbf4f8e706784c690f3d5b38cad75276387) Reviewed-on: http://gerrit.cloudera.org:8080/21996 Reviewed-by: Abhishek Chennaka <[email protected]> --- src/kudu/integration-tests/security-itest.cc | 4 +++ src/kudu/master/dynamic_multi_master-test.cc | 37 +++----------------------- src/kudu/mini-cluster/external_mini_cluster.cc | 29 ++++++++++++++++++-- src/kudu/mini-cluster/external_mini_cluster.h | 12 +++++++++ 4 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/kudu/integration-tests/security-itest.cc b/src/kudu/integration-tests/security-itest.cc index 78452461b..f0509f920 100644 --- a/src/kudu/integration-tests/security-itest.cc +++ b/src/kudu/integration-tests/security-itest.cc @@ -906,6 +906,10 @@ TEST_F(SecurityITest, TestNonDefaultPrincipalMultipleMaster) { ASSERT_OK(cluster_->AddMaster()); ASSERT_OK(cluster_->master(2)->WaitForCatalogManager()); + ASSERT_EVENTUALLY([&] { + ASSERT_OK(cluster_->VerifyVotersOnAllMasters(3)); + }); + // Tablet servers need to be restarted in order to update their master list. for (int i = 0; i < cluster_->num_tablet_servers(); i++) { cluster_->tablet_server(i)->Shutdown(); diff --git a/src/kudu/master/dynamic_multi_master-test.cc b/src/kudu/master/dynamic_multi_master-test.cc index ed72c3028..1479ef736 100644 --- a/src/kudu/master/dynamic_multi_master-test.cc +++ b/src/kudu/master/dynamic_multi_master-test.cc @@ -203,37 +203,6 @@ Status RunListMasters(ListMastersResponsePB* resp, ExternalMiniCluster* cluster) return RunLeaderMasterRPC(list_masters, cluster); } -// Verify the 'cluster' contains 'num_masters' on all masters, and that they -// are all voters. Returns an error if the expected state is not present. -// -// This should be used instead of VerifyVoterMastersForCluster() if it is -// required that all masters have accepted the config change. E.g. tests that -// restart a cluster after adding a master should verify that all masters agree -// before restarting, in case lagging masters start up with stale configs. -// -// TODO(awong): we should be more robust to starting up with mismatched on-disk -// configs, if we can help it. -Status VerifyVotersOnAllMasters(int num_masters, ExternalMiniCluster* cluster) { - for (int i = 0; i < cluster->num_masters(); i++) { - ListMastersResponsePB resp; - ListMastersRequestPB req; - RpcController rpc; - RETURN_NOT_OK(cluster->master_proxy(i)->ListMasters(req, &resp, &rpc)); - if (num_masters != resp.masters_size()) { - return Status::IllegalState(Substitute("expected $0 masters but got $1", - num_masters, resp.masters_size())); - } - for (const auto& master : resp.masters()) { - if ((master.role() != RaftPeerPB::LEADER && master.role() != RaftPeerPB::FOLLOWER) || - master.member_type() != RaftPeerPB::VOTER || - master.registration().rpc_addresses_size() != 1) { - return Status::IllegalState(Substitute("bad master: $0", SecureShortDebugString(master))); - } - } - } - return Status::OK(); -} - // Verify the ExternalMiniCluster 'cluster' contains 'num_masters' overall and // are all VOTERS. Populates the new master addresses in 'master_hps', if not // nullptr. Returns an error if the expected state is not present. @@ -1699,7 +1668,7 @@ TEST_F(AutoAddMasterTest, TestAddWithOnGoingDdl) { ASSERT_OK(new_master->WaitForCatalogManager()); num_masters++; ASSERT_EVENTUALLY([&] { - ASSERT_OK(VerifyVotersOnAllMasters(num_masters, cluster_.get())); + ASSERT_OK(cluster_->VerifyVotersOnAllMasters(num_masters)); }); { std::lock_guard l(master_addrs_lock); @@ -1784,7 +1753,7 @@ TEST_F(AutoAddMasterTest, TestAddNewMaster) { ASSERT_OK(peer->WaitForCatalogManager()); auto expected_num_masters = ++idx; ASSERT_EVENTUALLY([&] { - ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get())); + ASSERT_OK(cluster_->VerifyVotersOnAllMasters(expected_num_masters)); }); NO_FATALS(cluster_->AssertNoCrashes()); } @@ -1811,7 +1780,7 @@ TEST_F(MinidumpTest, TestAddNewMasterMinidumpsEnabled) { ASSERT_OK(peer->WaitForCatalogManager()); auto expected_num_masters = ++idx; ASSERT_EVENTUALLY([&] { - ASSERT_OK(VerifyVotersOnAllMasters(expected_num_masters, cluster_.get())); + ASSERT_OK(cluster_->VerifyVotersOnAllMasters(expected_num_masters)); }); NO_FATALS(cluster_->AssertNoCrashes()); } diff --git a/src/kudu/mini-cluster/external_mini_cluster.cc b/src/kudu/mini-cluster/external_mini_cluster.cc index a935e17c1..a8044e33c 100644 --- a/src/kudu/mini-cluster/external_mini_cluster.cc +++ b/src/kudu/mini-cluster/external_mini_cluster.cc @@ -35,17 +35,20 @@ #include "kudu/client/client.h" #include "kudu/client/master_rpc.h" +#include "kudu/consensus/metadata.pb.h" #include "kudu/fs/default_key_provider.h" #include "kudu/fs/fs.pb.h" #include "kudu/fs/key_provider.h" #include "kudu/postgres/mini_postgres.h" #include "kudu/ranger-kms/mini_ranger_kms.h" #include "kudu/rpc/rpc_header.pb.h" +#include "kudu/tablet/metadata.pb.h" #if !defined(NO_CHRONY) #include "kudu/clock/test/mini_chronyd.h" #endif #include "kudu/common/wire_protocol.h" #include "kudu/common/wire_protocol.pb.h" +#include "kudu/consensus/consensus.proxy.h" #include "kudu/gutil/basictypes.h" #include "kudu/gutil/strings/escaping.h" #include "kudu/gutil/strings/join.h" @@ -63,12 +66,10 @@ #include "kudu/security/test/mini_kdc.h" #include "kudu/server/server_base.pb.h" #include "kudu/server/server_base.proxy.h" -#include "kudu/tablet/metadata.pb.h" #include "kudu/tablet/tablet.pb.h" #include "kudu/tserver/tserver.pb.h" #include "kudu/tserver/tserver_admin.proxy.h" #include "kudu/tserver/tserver_service.proxy.h" -#include "kudu/consensus/consensus.proxy.h" #include "kudu/util/async_util.h" #include "kudu/util/env.h" #include "kudu/util/env_util.h" @@ -94,8 +95,11 @@ using kudu::client::internal::ConnectToClusterRpc; #if !defined(NO_CHRONY) using kudu::clock::MiniChronyd; #endif +using kudu::master::ListMastersRequestPB; +using kudu::master::ListMastersResponsePB; using kudu::master::ListTablesRequestPB; using kudu::master::ListTablesResponsePB; +using kudu::consensus::RaftPeerPB; using kudu::master::MasterServiceProxy; using kudu::pb_util::SecureDebugString; using kudu::pb_util::SecureShortDebugString; @@ -996,6 +1000,27 @@ Status ExternalMiniCluster::WaitForTabletsRunning( return Status::TimedOut(SecureDebugString(resp)); } +Status ExternalMiniCluster::VerifyVotersOnAllMasters(int num_masters) { + for (int i = 0; i < this->num_masters(); i++) { + ListMastersResponsePB resp; + ListMastersRequestPB req; + RpcController rpc; + RETURN_NOT_OK(master_proxy(i)->ListMasters(req, &resp, &rpc)); + if (num_masters != resp.masters_size()) { + return Status::IllegalState(Substitute("expected $0 masters but got $1", + num_masters, resp.masters_size())); + } + for (const auto& master : resp.masters()) { + if ((master.role() != RaftPeerPB::LEADER && master.role() != RaftPeerPB::FOLLOWER) || + master.member_type() != RaftPeerPB::VOTER || + master.registration().rpc_addresses_size() != 1) { + return Status::IllegalState(Substitute("bad master: $0", SecureShortDebugString(master))); + } + } + } + return Status::OK(); +} + ExternalMaster* ExternalMiniCluster::leader_master() { int idx = 0; Status s = GetLeaderMasterIndex(&idx); diff --git a/src/kudu/mini-cluster/external_mini_cluster.h b/src/kudu/mini-cluster/external_mini_cluster.h index b60dcc4d1..449f0b5c2 100644 --- a/src/kudu/mini-cluster/external_mini_cluster.h +++ b/src/kudu/mini-cluster/external_mini_cluster.h @@ -613,6 +613,18 @@ class ExternalMiniCluster : public MiniCluster { size_t idx, scoped_refptr<ExternalMaster>* master); + // Verify the cluster contains 'num_masters' on all masters, and that they + // are all voters. Returns an error if the expected state is not present. + // + // This should be used if it is required that all masters have accepted the config + // change. E.g. tests that restart a cluster after adding a master should verify + // that all masters agree before restarting, in case lagging masters start up with + // stale configs. + // + // TODO(awong): we should be more robust to starting up with mismatched on-disk + // configs, if we can help it. + Status VerifyVotersOnAllMasters(int num_masters); + private: Status StartMasters();
