This is an automated email from the ASF dual-hosted git repository.
zhangyifan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new e734ae021 [KUDU-3452] Make validate tablet creating task not affected
e734ae021 is described below
commit e734ae0216749c6aa7d85756eecf2b8be907de5a
Author: xinghuayu007 <[email protected]>
AuthorDate: Tue Mar 7 11:42:11 2023 +0800
[KUDU-3452] Make validate tablet creating task not affected
Currently, creating a table with RF=n when the number of
healthy tservers is less than n will get stuck. Because
catalog manager creates tablets for it will fail and retry
continuously. At the same time, creating a table with RF=m
also will get stuck even if there are more than m healthy
tservers. Because catalog manager will return when finds a
tablet-creating task failed and will not try to select replicas
for other PREPARING tablets. For example, creating a three
replicas table times out when one of three tablet servers becomes
unavailable. After that, creating a two-replicas table also
will timeout even if there are enough tablet servers to place
its replicas. The validate two-replicas table-creating task will
be affected by invalidate three-replicas table-creating task.
This patch fixes this problem. If a task of creating tablet fail,
it will not return immediately, but let other tasks of creating
other tablets keep on running.
Change-Id: I64668651d0e8f58b92cfb841bdb20617de6776f9
Reviewed-on: http://gerrit.cloudera.org:8080/19594
Reviewed-by: Yifan Zhang <[email protected]>
Tested-by: Yifan Zhang <[email protected]>
---
src/kudu/integration-tests/create-table-itest.cc | 91 ++++++++++++++++++++++++
src/kudu/master/catalog_manager.cc | 23 ++++--
2 files changed, 109 insertions(+), 5 deletions(-)
diff --git a/src/kudu/integration-tests/create-table-itest.cc
b/src/kudu/integration-tests/create-table-itest.cc
index f74af64a6..decde5118 100644
--- a/src/kudu/integration-tests/create-table-itest.cc
+++ b/src/kudu/integration-tests/create-table-itest.cc
@@ -777,4 +777,95 @@ TEST_P(NotEnoughHealthyTServersTest,
TestNotEnoughHealthyTServers) {
});
}
}
+
+TEST_F(CreateTableITest, TestNotAffectedCreatingTables) {
+ constexpr int kTimeout = 4000;
+ constexpr int kNumTServers = 3;
+ constexpr int kHeartbeatIntervalMs = 2000;
+ constexpr const char* const kNotEnoughTServersTableId =
"NotEnoughTServersTable";
+ constexpr const char* const kTwoReplicasTableId = "TwoReplicasTable";
+ constexpr const char* const kOneReplicasTableId = "OneReplicasTable";
+
+ vector<string> master_flags = { "--allow_unsafe_replication_factor=true",
+
Substitute("--tserver_unresponsive_timeout_ms=$0", kTimeout),
+ Substitute("--heartbeat_interval_ms=$0",
kHeartbeatIntervalMs),
+
"--catalog_manager_check_ts_count_for_create_table=false" };
+ NO_FATALS(StartCluster({}, master_flags, kNumTServers));
+
+ string master_address = cluster_->master()->bound_rpc_addr().ToString();
+
+ // Shutdown one tablet server.
+ string stopped_ts_uuid = cluster_->tablet_server(0)->uuid();
+ cluster_->tablet_server(0)->Shutdown();
+
+ // Wait until the tablet server become unavailable for replicas placement.
+ SleepFor(MonoDelta::FromMilliseconds(kTimeout + kHeartbeatIntervalMs));
+
+ auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema());
+
+ auto create_table_func = [&](const string& table_name, int replica_num) ->
Status {
+ unique_ptr<client::KuduTableCreator> table_creator(
+ client_->NewTableCreator());
+ return table_creator->table_name(table_name)
+ .schema(&client_schema)
+ .set_range_partition_columns({ "key" })
+ .num_replicas(replica_num)
+ .timeout(MonoDelta::FromMilliseconds(kTimeout))
+ .Create();
+ };
+
+ // Create a table without enough healthy tablet servers.
+ // It will timeout.
+ {
+ Status s = create_table_func(kNotEnoughTServersTableId,
cluster_->num_tablet_servers());
+ ASSERT_TRUE(s.IsTimedOut()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "Timed out waiting for Table Creation");
+ }
+ // Create a two-replicas table should succeed.
+ ASSERT_OK(create_table_func(kTwoReplicasTableId, 2));
+
+ // Create an one-replica table should succeed.
+ ASSERT_OK(create_table_func(kOneReplicasTableId, 1));
+
+ {
+ string out;
+ string cmd = Substitute(
+ "cluster ksck $0 --sections=TABLE_SUMMARIES --ksck_format=json_compact",
master_address);
+ kudu::tools::RunKuduTool(strings::Split(cmd, " ", strings::SkipEmpty()),
&out);
+ rapidjson::Document doc;
+ doc.Parse<0>(out.c_str());
+ // Only contains 2 tables kTwoReplicasTableId and kOneReplicasTableId.
+ ASSERT_EQ(2, doc["table_summaries"].Size());
+ ASSERT_STR_CONTAINS(out, kTwoReplicasTableId);
+ ASSERT_STR_CONTAINS(out, kOneReplicasTableId);
+ const rapidjson::Value& items = doc["table_summaries"];
+ for (int i = 0; i < items.Size(); i++) {
+ if (items[i]["name"].GetString() == kTwoReplicasTableId ||
+ items[i]["name"].GetString() == kOneReplicasTableId) {
+ ASSERT_EQ(string("HEALTHY"), items[i]["health"].GetString());
+ }
+ }
+ }
+ {
+ // Restart the tablet server and check the health of the cluster.
+ ASSERT_OK(cluster_->tablet_server(0)->Restart());
+ ASSERT_EVENTUALLY([&] {
+ string out;
+ string cmd = Substitute(
+ "cluster ksck $0 --sections=TABLE_SUMMARIES
--ksck_format=json_compact",
+ master_address);
+ // KSCK will be OK.
+ ASSERT_OK(kudu::tools::RunKuduTool(
+ strings::Split(cmd, " ", strings::SkipEmpty()), &out));
+ rapidjson::Document doc;
+ doc.Parse<0>(out.c_str());
+ // Contains 3 tables kTwoReplicasTableId,
+ // kOneReplicasTableId and kNotEnoughTServersTableId.
+ ASSERT_EQ(3, doc["table_summaries"].Size());
+ ASSERT_STR_CONTAINS(out, kTwoReplicasTableId);
+ ASSERT_STR_CONTAINS(out, kOneReplicasTableId);
+ ASSERT_STR_CONTAINS(out, kNotEnoughTServersTableId);
+ });
+ }
+}
} // namespace kudu
diff --git a/src/kudu/master/catalog_manager.cc
b/src/kudu/master/catalog_manager.cc
index f50c724ce..790d130bf 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -5953,18 +5953,30 @@ Status CatalogManager::ProcessPendingAssignments(
return Status::OK();
}
- // For those tablets which need to be created in this round, assign replicas.
{
TSDescriptorVector ts_descs;
master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs);
PlacementPolicy policy(std::move(ts_descs), &rng_);
- for (auto& tablet : deferred.needs_create_rpc) {
+ auto it = deferred.needs_create_rpc.begin();
+ while (it != deferred.needs_create_rpc.end()) {
// NOTE: if we fail to select replicas on the first pass (due to
// insufficient Tablet Servers being online), we will still try
// again unless the tablet/table creation is cancelled.
- RETURN_NOT_OK_PREPEND(SelectReplicasForTablet(policy, tablet.get()),
- Substitute("error selecting replicas for tablet
$0",
- tablet->id()));
+ Status s = SelectReplicasForTablet(policy, (*it).get());
+ if (s.ok()) {
+ it++;
+ } else {
+ // The state of CREATING means the catalog manager has selected
replicas for the tablet
+ // and waits for CreateTablet RPC finished. If selecting replicas for
the tablet fails,
+ // it needs to recover the state to PREPARING, or its state will
become CREATING later
+ // and waits for a long time(defined by
FLAGS_tablet_creation_timeout_ms) to create a
+ // new tablet to replace it.
+ (*it)->mutable_metadata()->mutable_dirty()->set_state(
+ SysTabletsEntryPB::PREPARING, "Sending initial creation of
tablet");
+ LOG(ERROR) << Substitute("Error selecting replicas for tablet $0,
reason: $1",
+ (*it)->id(), s.ToString());
+ it = deferred.needs_create_rpc.erase(it);
+ }
}
}
@@ -6005,6 +6017,7 @@ Status CatalogManager::ProcessPendingAssignments(
SendDeleteTabletRequest(tablet, l, l.data().pb.state_msg());
}
}
+
// Send the CreateTablet() requests to the servers. This is asynchronous /
non-blocking.
for (const auto& tablet : deferred.needs_create_rpc) {
TabletMetadataLock l(tablet.get(), LockMode::READ);