Repository: kudu Updated Branches: refs/heads/master 3e659dcd9 -> 07c6efb58
[client-test] ServiceUnavailable retry scenario KUDU-1871 Kudu C++ client does not retry on ServiceUnavailable errors Added test to verify behavior of the Kudu C++ client in case of ServiceUnavailable error returned from master server. The test is currently disabled because it fails. The follow-up commit will add the fix and enable the test. Change-Id: Ief82add7b36c1f3875ea77e7f7503f6fb552838c Reviewed-on: http://gerrit.cloudera.org:8080/5974 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/0898a5bb Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/0898a5bb Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/0898a5bb Branch: refs/heads/master Commit: 0898a5bbca96066410c5c11e2aff9ec6d334d54b Parents: 3e659dc Author: Alexey Serbin <[email protected]> Authored: Sat Feb 11 16:08:18 2017 -0800 Committer: Alexey Serbin <[email protected]> Committed: Tue Feb 14 23:51:31 2017 +0000 ---------------------------------------------------------------------- src/kudu/client/client-test.cc | 78 ++++++++++++++++++++++++++++++++++ src/kudu/master/catalog_manager.h | 9 ++++ 2 files changed, 87 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/0898a5bb/src/kudu/client/client-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc index b7bffc7..8442da6 100644 --- a/src/kudu/client/client-test.cc +++ b/src/kudu/client/client-test.cc @@ -4659,5 +4659,83 @@ TEST_F(ClientTest, TestGetSecurityInfoFromMaster) { ASSERT_EQ(1, client_->data_->messenger_->tls_context().trusted_cert_count_for_tests()); } +struct ServiceUnavailableRetryParams { + MonoDelta usurper_sleep; + MonoDelta client_timeout; + std::function<bool(const Status&)> status_check; +}; + +class ServiceUnavailableRetryClientTest : + public ClientTest, + public ::testing::WithParamInterface<ServiceUnavailableRetryParams> { +}; + +// Test the client retries on ServiceUnavailable errors. This scenario uses +// 'CreateTable' RPC to verify the retry behavior. +TEST_P(ServiceUnavailableRetryClientTest, DISABLED_CreateTable) { + // This scenario is designed to run on a single-master cluster. + ASSERT_EQ(1, cluster_->num_masters()); + + const ServiceUnavailableRetryParams& param(GetParam()); + + CountDownLatch start_synchronizer(1); + // Usurp catalog manager's leader lock for some time to block + // the catalog manager from performing its duties when becoming a leader. + // Keeping the catalog manager off its duties results in ServiceUnavailable + // error response for 'CreateTable' RPC. + thread usurper([&]() { + std::lock_guard<RWMutex> leader_lock_guard( + cluster_->mini_master()->master()->catalog_manager()->leader_lock_); + start_synchronizer.CountDown(); + SleepFor(param.usurper_sleep); + }); + + // Wait for the lock to be acquired by the usurper thread to make sure + // the following CreateTable call will get ServiceUnavailable response + // while the lock is held. + start_synchronizer.Wait(); + + // Start issuing 'CreateTable' RPC when the catalog manager is still + // unavailable to serve the incoming requests. + unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator()); + const Status s = table_creator->table_name("service-unavailable-retry") + .schema(&schema_) + .num_replicas(1) + .set_range_partition_columns({ "key" }) + .timeout(param.client_timeout) + .Create(); + EXPECT_TRUE(param.status_check(s)) << s.ToString(); + + // Wait for the usurper thread to release the lock, otherwise there + // would be a problem with destroying a locked mutex upon catalog manager + // shutdown. + usurper.join(); +} + +static const ServiceUnavailableRetryParams service_unavailable_retry_cases[] = { + // Under the hood, there should be multiple retries. However, they should + // fail as timed out because the catalog manager responds with + // ServiceUnavailable error to all calls: the leader lock is still held + // by the usurping thread. + { + MonoDelta::FromSeconds(2), // usurper_sleep + MonoDelta::FromMilliseconds(500), // client_timeout + &Status::IsTimedOut, // status_check + }, + + // After some time the usurper thread should release the lock and the catalog + // manager should succeed. I.e., the client should succeed if retrying the + // request for long enough time. + { + MonoDelta::FromSeconds(1), // usurper_sleep + MonoDelta::FromSeconds(10), // client_timeout + &Status::ok, // status_check + }, +}; + +INSTANTIATE_TEST_CASE_P( + , ServiceUnavailableRetryClientTest, + ::testing::ValuesIn(service_unavailable_retry_cases)); + } // namespace client } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/0898a5bb/src/kudu/master/catalog_manager.h ---------------------------------------------------------------------- diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h index 336cca3..d7c3800 100644 --- a/src/kudu/master/catalog_manager.h +++ b/src/kudu/master/catalog_manager.h @@ -47,6 +47,12 @@ namespace kudu { class Schema; class ThreadPool; + +// Working around FRIEND_TEST() ugliness. +namespace client { +class ServiceUnavailableRetryClientTest_DISABLED_CreateTable_Test; +} // namespace client + class CreateTableStressTest_TestConcurrentCreateTableAndReloadMetadata_Test; namespace rpc { @@ -518,6 +524,9 @@ class CatalogManager : public tserver::TabletPeerLookupIf { // This test calls VisitTablesAndTablets() directly. FRIEND_TEST(kudu::CreateTableStressTest, TestConcurrentCreateTableAndReloadMetadata); + // This test exclusively acquires the leader_lock_ directly. + FRIEND_TEST(kudu::client::ServiceUnavailableRetryClientTest, DISABLED_CreateTable); + friend class TableLoader; friend class TabletLoader;
