This is an automated email from the ASF dual-hosted git repository.

zhangyifan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new e734ae021 [KUDU-3452] Make validate tablet creating task not affected
e734ae021 is described below

commit e734ae0216749c6aa7d85756eecf2b8be907de5a
Author: xinghuayu007 <[email protected]>
AuthorDate: Tue Mar 7 11:42:11 2023 +0800

    [KUDU-3452] Make validate tablet creating task not affected
    
    Currently, creating a table with RF=n when the number of
    healthy tservers is less than n will get stuck. Because
    catalog manager creates tablets for it will fail and retry
    continuously. At the same time, creating a table with RF=m
    also will get stuck even if there are more than m healthy
    tservers. Because catalog manager will return when finds a
    tablet-creating task failed and will not try to select replicas
    for other PREPARING tablets. For example, creating a three
    replicas table times out when one of three tablet servers becomes
    unavailable. After that, creating a two-replicas table also
    will timeout even if there are enough tablet servers to place
    its replicas. The validate two-replicas table-creating task will
    be affected by invalidate three-replicas table-creating task.
    
    This patch fixes this problem. If a task of creating tablet fail,
    it will not return immediately, but let other tasks of creating
    other tablets keep on running.
    
    Change-Id: I64668651d0e8f58b92cfb841bdb20617de6776f9
    Reviewed-on: http://gerrit.cloudera.org:8080/19594
    Reviewed-by: Yifan Zhang <[email protected]>
    Tested-by: Yifan Zhang <[email protected]>
---
 src/kudu/integration-tests/create-table-itest.cc | 91 ++++++++++++++++++++++++
 src/kudu/master/catalog_manager.cc               | 23 ++++--
 2 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/src/kudu/integration-tests/create-table-itest.cc 
b/src/kudu/integration-tests/create-table-itest.cc
index f74af64a6..decde5118 100644
--- a/src/kudu/integration-tests/create-table-itest.cc
+++ b/src/kudu/integration-tests/create-table-itest.cc
@@ -777,4 +777,95 @@ TEST_P(NotEnoughHealthyTServersTest, 
TestNotEnoughHealthyTServers) {
     });
   }
 }
+
+TEST_F(CreateTableITest, TestNotAffectedCreatingTables) {
+  constexpr int kTimeout = 4000;
+  constexpr int kNumTServers = 3;
+  constexpr int kHeartbeatIntervalMs = 2000;
+  constexpr const char* const kNotEnoughTServersTableId = 
"NotEnoughTServersTable";
+  constexpr const char* const kTwoReplicasTableId = "TwoReplicasTable";
+  constexpr const char* const kOneReplicasTableId = "OneReplicasTable";
+
+  vector<string> master_flags = { "--allow_unsafe_replication_factor=true",
+                                  
Substitute("--tserver_unresponsive_timeout_ms=$0", kTimeout),
+                                  Substitute("--heartbeat_interval_ms=$0", 
kHeartbeatIntervalMs),
+                                  
"--catalog_manager_check_ts_count_for_create_table=false" };
+  NO_FATALS(StartCluster({}, master_flags, kNumTServers));
+
+  string master_address = cluster_->master()->bound_rpc_addr().ToString();
+
+  // Shutdown one tablet server.
+  string stopped_ts_uuid = cluster_->tablet_server(0)->uuid();
+  cluster_->tablet_server(0)->Shutdown();
+
+  // Wait until the tablet server become unavailable for replicas placement.
+  SleepFor(MonoDelta::FromMilliseconds(kTimeout + kHeartbeatIntervalMs));
+
+  auto client_schema = KuduSchema::FromSchema(GetSimpleTestSchema());
+
+  auto create_table_func = [&](const string& table_name, int replica_num) -> 
Status {
+    unique_ptr<client::KuduTableCreator> table_creator(
+        client_->NewTableCreator());
+    return table_creator->table_name(table_name)
+                        .schema(&client_schema)
+                        .set_range_partition_columns({ "key" })
+                        .num_replicas(replica_num)
+                        .timeout(MonoDelta::FromMilliseconds(kTimeout))
+                        .Create();
+  };
+
+  // Create a table without enough healthy tablet servers.
+  // It will timeout.
+  {
+    Status s = create_table_func(kNotEnoughTServersTableId, 
cluster_->num_tablet_servers());
+    ASSERT_TRUE(s.IsTimedOut()) << s.ToString();
+    ASSERT_STR_CONTAINS(s.ToString(), "Timed out waiting for Table Creation");
+  }
+  // Create a two-replicas table should succeed.
+  ASSERT_OK(create_table_func(kTwoReplicasTableId, 2));
+
+  // Create an one-replica table should succeed.
+  ASSERT_OK(create_table_func(kOneReplicasTableId, 1));
+
+  {
+    string out;
+    string cmd = Substitute(
+      "cluster ksck $0 --sections=TABLE_SUMMARIES --ksck_format=json_compact", 
master_address);
+    kudu::tools::RunKuduTool(strings::Split(cmd, " ", strings::SkipEmpty()), 
&out);
+    rapidjson::Document doc;
+    doc.Parse<0>(out.c_str());
+    // Only contains 2 tables kTwoReplicasTableId and kOneReplicasTableId.
+    ASSERT_EQ(2, doc["table_summaries"].Size());
+    ASSERT_STR_CONTAINS(out, kTwoReplicasTableId);
+    ASSERT_STR_CONTAINS(out, kOneReplicasTableId);
+    const rapidjson::Value& items = doc["table_summaries"];
+    for (int i = 0; i < items.Size(); i++) {
+      if (items[i]["name"].GetString() == kTwoReplicasTableId ||
+          items[i]["name"].GetString() == kOneReplicasTableId) {
+        ASSERT_EQ(string("HEALTHY"), items[i]["health"].GetString());
+      }
+    }
+  }
+  {
+    // Restart the tablet server and check the health of the cluster.
+    ASSERT_OK(cluster_->tablet_server(0)->Restart());
+    ASSERT_EVENTUALLY([&] {
+      string out;
+      string cmd = Substitute(
+          "cluster ksck $0 --sections=TABLE_SUMMARIES 
--ksck_format=json_compact",
+          master_address);
+      // KSCK will be OK.
+      ASSERT_OK(kudu::tools::RunKuduTool(
+          strings::Split(cmd, " ", strings::SkipEmpty()), &out));
+      rapidjson::Document doc;
+      doc.Parse<0>(out.c_str());
+      // Contains 3 tables kTwoReplicasTableId,
+      // kOneReplicasTableId and kNotEnoughTServersTableId.
+      ASSERT_EQ(3, doc["table_summaries"].Size());
+      ASSERT_STR_CONTAINS(out, kTwoReplicasTableId);
+      ASSERT_STR_CONTAINS(out, kOneReplicasTableId);
+      ASSERT_STR_CONTAINS(out, kNotEnoughTServersTableId);
+    });
+  }
+}
 } // namespace kudu
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index f50c724ce..790d130bf 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -5953,18 +5953,30 @@ Status CatalogManager::ProcessPendingAssignments(
     return Status::OK();
   }
 
-  // For those tablets which need to be created in this round, assign replicas.
   {
     TSDescriptorVector ts_descs;
     master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs);
     PlacementPolicy policy(std::move(ts_descs), &rng_);
-    for (auto& tablet : deferred.needs_create_rpc) {
+    auto it = deferred.needs_create_rpc.begin();
+    while (it != deferred.needs_create_rpc.end()) {
       // NOTE: if we fail to select replicas on the first pass (due to
       // insufficient Tablet Servers being online), we will still try
       // again unless the tablet/table creation is cancelled.
-      RETURN_NOT_OK_PREPEND(SelectReplicasForTablet(policy, tablet.get()),
-                            Substitute("error selecting replicas for tablet 
$0",
-                                       tablet->id()));
+      Status s = SelectReplicasForTablet(policy, (*it).get());
+      if (s.ok()) {
+        it++;
+      } else {
+        // The state of CREATING means the catalog manager has selected 
replicas for the tablet
+        // and waits for CreateTablet RPC finished. If selecting replicas for 
the tablet fails,
+        // it needs to recover the state to PREPARING, or its state will 
become CREATING later
+        // and waits for a long time(defined by 
FLAGS_tablet_creation_timeout_ms) to create a
+        // new tablet to replace it.
+        (*it)->mutable_metadata()->mutable_dirty()->set_state(
+            SysTabletsEntryPB::PREPARING, "Sending initial creation of 
tablet");
+        LOG(ERROR) << Substitute("Error selecting replicas for tablet $0, 
reason: $1",
+                                 (*it)->id(), s.ToString());
+        it = deferred.needs_create_rpc.erase(it);
+      }
     }
   }
 
@@ -6005,6 +6017,7 @@ Status CatalogManager::ProcessPendingAssignments(
       SendDeleteTabletRequest(tablet, l, l.data().pb.state_msg());
     }
   }
+
   // Send the CreateTablet() requests to the servers. This is asynchronous / 
non-blocking.
   for (const auto& tablet : deferred.needs_create_rpc) {
     TabletMetadataLock l(tablet.get(), LockMode::READ);

Reply via email to