This is an automated email from the ASF dual-hosted git repository. granthenke pushed a commit to branch branch-1.15.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 33c05f1959e14dc83106d9e28f7afc7659cbc9b7 Author: Alexey Serbin <[email protected]> AuthorDate: Tue May 25 00:28:57 2021 -0700 [test] minor clean up on TestUnsafeChangeConfigLeaderWithPendingConfig When looking at one of recent pre-commit test failure [1][2], I found that AdminCliTest.TestUnsafeChangeConfigLeaderWithPendingConfig failed but it was hard to tell what was the actual status code returned: src/kudu/tools/kudu-admin-test.cc:881: Failure Value of: s.IsTimedOut() Actual: false Expected: true I reran the scenario multiple times with TSAN-enabled binaries, but was not able to reproduce the test failure yet. Anyways, I added more diagnostics about the actual status code returned and cleaned up the code of the test scenario a bit. Hopefully, next time it fails it will be clearer what's going on. [1] http://jenkins.kudu.apache.org/job/kudu-gerrit/23918/BUILD_TYPE=TSAN [2] http://dist-test.cloudera.org/job?job_id=jenkins-slave.1621893605.3746155 Change-Id: Id2bf354764f3ddb371de19910c5f879c1a06c78b Reviewed-on: http://gerrit.cloudera.org:8080/17504 Tested-by: Kudu Jenkins Reviewed-by: Mahesh Reddy <[email protected]> Reviewed-by: Andrew Wong <[email protected]> Reviewed-by: Grant Henke <[email protected]> (cherry picked from commit a0bda0989d97fac182f409ecaef3357a1fb5329f) Reviewed-on: http://gerrit.cloudera.org:8080/17514 Reviewed-by: Alexey Serbin <[email protected]> --- src/kudu/tools/kudu-admin-test.cc | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc index daafb99..81f489c 100644 --- a/src/kudu/tools/kudu-admin-test.cc +++ b/src/kudu/tools/kudu-admin-test.cc @@ -849,14 +849,12 @@ TEST_F(AdminCliTest, TestUnsafeChangeConfigLeaderWithPendingConfig) { } // Get a baseline config reported to the master. - LOG(INFO) << "Waiting for Master to see the current replicas..."; master::GetTabletLocationsResponsePB tablet_locations; bool has_leader; ASSERT_OK(WaitForReplicasReportedToMaster(cluster_->master_proxy(), 3, tablet_id_, kTimeout, WAIT_FOR_LEADER, VOTER_REPLICA, &has_leader, &tablet_locations)); - LOG(INFO) << "Tablet locations:\n" << SecureDebugString(tablet_locations); ASSERT_TRUE(has_leader) << SecureDebugString(tablet_locations); TServerDetails* leader_ts; @@ -865,7 +863,7 @@ TEST_F(AdminCliTest, TestUnsafeChangeConfigLeaderWithPendingConfig) { ASSERT_EVENTUALLY([&] { ASSERT_OK(FindTabletFollowers(active_tablet_servers, tablet_id_, kTimeout, &followers)); }); - ASSERT_EQ(followers.size(), 2); + ASSERT_EQ(2, followers.size()); // Wait for initial NO_OP to be committed by the leader. ASSERT_OK(WaitForOpFromCurrentTerm(leader_ts, tablet_id_, COMMITTED_OPID, kTimeout)); @@ -874,14 +872,12 @@ TEST_F(AdminCliTest, TestUnsafeChangeConfigLeaderWithPendingConfig) { cluster_->tablet_server_by_uuid(followers[0]->uuid())->Shutdown(); cluster_->tablet_server_by_uuid(followers[1]->uuid())->Shutdown(); - // Now try to replicate a ChangeConfig operation. This should get stuck and time out - // because the server can't replicate any operations. - Status s = RemoveServer(leader_ts, tablet_id_, followers[1], - MonoDelta::FromSeconds(2), -1); - ASSERT_TRUE(s.IsTimedOut()); + // Now try to replicate a ChangeConfig operation. This should get stuck and + // time out because the server can't replicate any operations. + const auto s = RemoveServer( + leader_ts, tablet_id_, followers[1], MonoDelta::FromSeconds(2), -1); + ASSERT_TRUE(s.IsTimedOut()) << s.ToString(); - LOG(INFO) << "Change Config Op timed out, Sending a Replace config " - << "command when change config op is pending on the leader."; const string& leader_addr = Substitute("$0:$1", leader_ts->registration.rpc_addresses(0).host(), leader_ts->registration.rpc_addresses(0).port()); @@ -894,19 +890,17 @@ TEST_F(AdminCliTest, TestUnsafeChangeConfigLeaderWithPendingConfig) { ASSERT_OK(cluster_->master()->Restart()); const TServerDetails* new_node = FindNodeForReReplication(active_tablet_servers); - ASSERT_TRUE(new_node != nullptr); + ASSERT_NE(nullptr, new_node); // Master may try to add the servers which are down until tserver_unresponsive_timeout_ms, // so it is safer to wait until consensus metadata has 3 voters on new_node. ASSERT_OK(WaitUntilCommittedConfigNumVotersIs(3, new_node, tablet_id_, kTimeout)); // Wait for the master to be notified of the config change. - LOG(INFO) << "Waiting for Master to see new config..."; ASSERT_OK(WaitForReplicasReportedToMaster(cluster_->master_proxy(), 3, tablet_id_, kTimeout, WAIT_FOR_LEADER, VOTER_REPLICA, &has_leader, &tablet_locations)); - LOG(INFO) << "Tablet locations:\n" << SecureDebugString(tablet_locations); for (const auto& replica : tablet_locations.tablet_locations(0).interned_replicas()) { const auto& uuid = tablet_locations.ts_infos(replica.ts_info_idx()).permanent_uuid(); ASSERT_NE(uuid, followers[0]->uuid());
