This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 2bd3d8281fc6260c441657d378fd4d023d526b6b Author: Alexey Serbin <[email protected]> AuthorDate: Tue Jun 23 11:43:24 2020 -0700 [autn_token_expire-itest] make one scenario more robust This patch addresses a TOCTOU condition in ClientReacquiresAuthnToken scenario of the MultiMasterIdleConnectionsITest test and replaces ad-hoc master re-election requests with graceful leadership transfer. I haven't seen the TOCTOU condition manifesting itself yet, but since I already spent time on looking around to find the reason behind the test failure (the root cause has been found and is fixed by https://gerrit.cloudera.org/#/c/16106/), it doesn't hurt to make the test scenario a bit more robust. Change-Id: Id06be851c92958c2c134d9e06a7dc133283aeba2 Reviewed-on: http://gerrit.cloudera.org:8080/16107 Reviewed-by: Grant Henke <[email protected]> Tested-by: Alexey Serbin <[email protected]> --- .../integration-tests/auth_token_expire-itest.cc | 37 ++++++++++++++-------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/kudu/integration-tests/auth_token_expire-itest.cc b/src/kudu/integration-tests/auth_token_expire-itest.cc index 6c0806c..a77fcfd 100644 --- a/src/kudu/integration-tests/auth_token_expire-itest.cc +++ b/src/kudu/integration-tests/auth_token_expire-itest.cc @@ -522,13 +522,12 @@ class MultiMasterIdleConnectionsITest : public AuthTokenExpireITestBase { // when the client tried to open the test table after master leader re-election: // Timed out: GetTableSchema timed out after deadline expired TEST_F(MultiMasterIdleConnectionsITest, ClientReacquiresAuthnToken) { - const string kTableName = "keep-connection-to-former-master-leader"; - if (!AllowSlowTests()) { LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run"; return; } + const string kTableName = "keep-connection-to-former-master-leader"; const auto time_start = MonoTime::Now(); shared_ptr<KuduClient> client; @@ -560,6 +559,9 @@ TEST_F(MultiMasterIdleConnectionsITest, ClientReacquiresAuthnToken) { SleepFor(MonoDelta::FromMilliseconds(250)); } + int former_leader_master_idx; + ASSERT_OK(cluster_->GetLeaderMasterIndex(&former_leader_master_idx)); + // Given the relation between the master_rpc_keepalive_time_ms_ and // authn_token_validity_seconds_ parameters, the original authn token should // expire and connections to follower masters should be torn down due to @@ -567,26 +569,33 @@ TEST_F(MultiMasterIdleConnectionsITest, ClientReacquiresAuthnToken) { // after waiting for additional token expiration interval. SleepFor(MonoDelta::FromSeconds(authn_token_validity_seconds_)); - { - int former_leader_master_idx; - ASSERT_OK(cluster_->GetLeaderMasterIndex(&former_leader_master_idx)); - const int leader_idx = (former_leader_master_idx + 1) % num_masters_; - ASSERT_EVENTUALLY([&] { + ASSERT_EVENTUALLY([&] { + // The leadership could change behind the scenes, so if a new leader master + // is already around, another leadership change isn't needed. + int leader_idx; + ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx)); + if (former_leader_master_idx == leader_idx) { + // Make a request to the current leader master to step down, transferring + // the leadership role to other master. consensus::ConsensusServiceProxy proxy( cluster_->messenger(), cluster_->master(leader_idx)->bound_rpc_addr(), cluster_->master(leader_idx)->bound_rpc_hostport().host()); - consensus::RunLeaderElectionRequestPB req; - req.set_tablet_id(master::SysCatalogTable::kSysCatalogTabletId); + consensus::LeaderStepDownRequestPB req; req.set_dest_uuid(cluster_->master(leader_idx)->uuid()); + req.set_tablet_id(master::SysCatalogTable::kSysCatalogTabletId); + req.set_mode(consensus::LeaderStepDownMode::GRACEFUL); + consensus::LeaderStepDownResponsePB resp; rpc::RpcController rpc; - rpc.set_timeout(MonoDelta::FromSeconds(1)); - consensus::RunLeaderElectionResponsePB resp; - ASSERT_OK(proxy.RunLeaderElection(req, &resp, &rpc)); + rpc.set_timeout(MonoDelta::FromSeconds(3)); + ASSERT_OK(proxy.LeaderStepDown(req, &resp, &rpc)); + + // Make sure the leader has actually changed. If not, the step-down + // request is retried until ASSERT_EVENTUALLY() times out. int idx; ASSERT_OK(cluster_->GetLeaderMasterIndex(&idx)); ASSERT_NE(former_leader_master_idx, idx); - }); - } + } + }); // Try to open the table after leader master re-election. The former leader // responds with NOT_THE_LEADER error even if the authn token has expired
