This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b7ef3d25db63c41a7cb08369ecec1bbfc82d553f Author: Alexey Serbin <[email protected]> AuthorDate: Tue May 12 15:41:35 2020 -0700 [tools] multiple tablet ids in 'local_replica delete' This patch updates the 'local_replica delete' tool to allow for multiple tablet identifiers to be specified and processed at once. The rationale behind this change is that opening tablet server's metadata takes significant time, and it's possible to reduce the overall latency if removing multiple tablet replicas at once after the metadata has already been opened. A new test to verify the new functionality has been added as well. Change-Id: If0e509d1775be2a728e4e3b10c724c1f15a96ec1 Reviewed-on: http://gerrit.cloudera.org:8080/15909 Tested-by: Kudu Jenkins Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/tools/kudu-tool-test.cc | 45 ++++++++++++++++++++- src/kudu/tools/tool_action_common.cc | 3 ++ src/kudu/tools/tool_action_common.h | 2 + src/kudu/tools/tool_action_local_replica.cc | 61 +++++++++++++++++++++-------- 4 files changed, 94 insertions(+), 17 deletions(-) diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 000ab72..5e3e2c5 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -1020,7 +1020,7 @@ TEST_F(ToolTest, TestModeHelp) { "data_size.*Summarize the data size", "dump.*Dump a Kudu filesystem", "copy_from_remote.*Copy a tablet replica", - "delete.*Delete a tablet replica from the local filesystem", + "delete.*Delete tablet replicas from the local filesystem", "list.*Show list of tablet replicas" }; NO_FATALS(RunTestHelp("local_replica", kLocalReplicaModeRegexes)); @@ -2765,6 +2765,49 @@ TEST_F(ToolTest, TestLocalReplicaDelete) { ASSERT_EQ(0, tablet_replicas.size()); } +// Test 'kudu local_replica delete' tool with multiple tablet replicas to +// operate at once. +TEST_F(ToolTest, TestLocalReplicaDeleteMultiple) { + static constexpr int kNumTablets = 10; + NO_FATALS(StartMiniCluster()); + + // TestWorkLoad.Setup() creates a table. + TestWorkload workload(mini_cluster_.get()); + workload.set_num_replicas(1); + workload.set_num_tablets(kNumTablets); + workload.Setup(); + + vector<string> tablet_ids; + MiniTabletServer* ts = mini_cluster_->mini_tablet_server(0); + { + // It's important to clear the 'tablet_replicas' container before shutting + // down the minicluster process. Otherwise CHECK() for the ThreadPool's + // tokens would trigger. + vector<scoped_refptr<TabletReplica>> tablet_replicas; + ts->server()->tablet_manager()->GetTabletReplicas(&tablet_replicas); + ASSERT_EQ(kNumTablets, tablet_replicas.size()); + for (const auto& replica : tablet_replicas) { + tablet_ids.emplace_back(replica->tablet_id()); + } + } + // Tablet server should be shutdown to release the lock and allow operating + // on its data. + ts->Shutdown(); + const string& tserver_dir = ts->options()->fs_opts.wal_root; + const string tablet_ids_csv_str = JoinStrings(tablet_ids, ","); + NO_FATALS(RunActionStdoutNone(Substitute( + "local_replica delete --fs_wal_dir=$0 --fs_data_dirs=$0 " + "--clean_unsafe $1", tserver_dir, tablet_ids_csv_str))); + + ASSERT_OK(ts->Start()); + ASSERT_OK(ts->WaitStarted()); + { + vector<scoped_refptr<TabletReplica>> tablet_replicas; + ts->server()->tablet_manager()->GetTabletReplicas(&tablet_replicas); + ASSERT_EQ(0, tablet_replicas.size()); + } +} + // Test 'kudu local_replica delete' tool for tombstoning the tablet. TEST_F(ToolTest, TestLocalReplicaTombstoneDelete) { NO_FATALS(StartMiniCluster()); diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc index 19feffe..4663ea8 100644 --- a/src/kudu/tools/tool_action_common.cc +++ b/src/kudu/tools/tool_action_common.cc @@ -194,6 +194,9 @@ const char* const kDestMasterAddressesArgDesc = "Either comma-separated list of const char* const kTableNameArg = "table_name"; const char* const kTabletIdArg = "tablet_id"; const char* const kTabletIdArgDesc = "Tablet Identifier"; +const char* const kTabletIdsCsvArg = "tablet_ids"; +const char* const kTabletIdsCsvArgDesc = + "Comma-separated list of Tablet Identifiers"; namespace { diff --git a/src/kudu/tools/tool_action_common.h b/src/kudu/tools/tool_action_common.h index 5631212..9b7a762 100644 --- a/src/kudu/tools/tool_action_common.h +++ b/src/kudu/tools/tool_action_common.h @@ -66,6 +66,8 @@ extern const char* const kDestMasterAddressesArgDesc; extern const char* const kTableNameArg; extern const char* const kTabletIdArg; extern const char* const kTabletIdArgDesc; +extern const char* const kTabletIdsCsvArg; +extern const char* const kTabletIdsCsvArgDesc; // Utility methods used by multiple actions across different modes. diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc index 31b3118..8673a2a 100644 --- a/src/kudu/tools/tool_action_local_replica.cc +++ b/src/kudu/tools/tool_action_local_replica.cc @@ -61,6 +61,7 @@ #include "kudu/gutil/strings/human_readable.h" #include "kudu/gutil/strings/join.h" #include "kudu/gutil/strings/numbers.h" +#include "kudu/gutil/strings/split.h" #include "kudu/gutil/strings/stringpiece.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/gutil/strings/util.h" @@ -371,11 +372,9 @@ Status CopyFromRemote(const RunnerContext& context) { return client.Finish(); } -Status DeleteLocalReplica(const RunnerContext& context) { - const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg); - FsManager fs_manager(Env::Default(), FsManagerOpts()); - RETURN_NOT_OK(fs_manager.Open()); - scoped_refptr<ConsensusMetadataManager> cmeta_manager(new ConsensusMetadataManager(&fs_manager)); +Status DeleteLocalReplica(const string& tablet_id, + FsManager* fs_manager, + const scoped_refptr<ConsensusMetadataManager>& cmeta_manager) { boost::optional<OpId> last_logged_opid = boost::none; TabletDataState state = TabletDataState::TABLET_DATA_DELETED; if (!FLAGS_clean_unsafe) { @@ -384,24 +383,54 @@ Status DeleteLocalReplica(const RunnerContext& context) { // the log, it's not an error. But if we receive any other error, // indicate the user to delete with --clean_unsafe flag. OpId opid; - Status s = FindLastLoggedOpId(&fs_manager, tablet_id, &opid); + Status s = FindLastLoggedOpId(fs_manager, tablet_id, &opid); if (s.ok()) { last_logged_opid = opid; } else if (s.IsNotFound()) { LOG(INFO) << "Could not find any replicated OpId from WAL, " - << "but proceeding with tablet tombstone: " << s.ToString(); + "but proceeding with tablet tombstone: " << s.ToString(); } else { - LOG(ERROR) << "Error attempting to find last replicated OpId from WAL: " << s.ToString(); - LOG(ERROR) << "Cannot delete (tombstone) the tablet, use --clean_unsafe to delete" - << " the tablet permanently from this server."; + LOG(ERROR) << "Error attempting to find last replicated OpId from WAL: " + << s.ToString(); + LOG(ERROR) << "Cannot delete (tombstone) the tablet replica, " + "use --clean_unsafe to delete the replica permanently " + "from this server"; return s; } } // Force the specified tablet on this node to be in 'state'. scoped_refptr<TabletMetadata> meta; - RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, tablet_id, &meta)); - RETURN_NOT_OK(TSTabletManager::DeleteTabletData(meta, cmeta_manager, state, last_logged_opid)); + RETURN_NOT_OK(TabletMetadata::Load(fs_manager, tablet_id, &meta)); + return TSTabletManager::DeleteTabletData( + meta, cmeta_manager, state, last_logged_opid); +} + +Status DeleteLocalReplicas(const RunnerContext& context) { + const string& tablet_ids_str = FindOrDie(context.required_args, kTabletIdsCsvArg); + vector<string> tablet_ids = strings::Split(tablet_ids_str, ",", strings::SkipEmpty()); + if (tablet_ids.empty()) { + return Status::InvalidArgument("no tablet identifiers provided"); + } + const auto orig_count = tablet_ids.size(); + std::sort(tablet_ids.begin(), tablet_ids.end()); + tablet_ids.erase(std::unique(tablet_ids.begin(), tablet_ids.end()), + tablet_ids.end()); + const auto uniq_count = tablet_ids.size(); + if (orig_count != uniq_count) { + LOG(INFO) << Substitute("removed $0 duplicate tablet identifiers", + orig_count - uniq_count); + } + LOG(INFO) << Substitute("deleting $0 tablets replicas", uniq_count); + + FsManager fs_manager(Env::Default(), {}); + RETURN_NOT_OK(fs_manager.Open()); + scoped_refptr<ConsensusMetadataManager> cmeta_manager( + new ConsensusMetadataManager(&fs_manager)); + for (const auto& tablet_id : tablet_ids) { + RETURN_NOT_OK(DeleteLocalReplica(tablet_id, &fs_manager, cmeta_manager)); + } + return Status::OK(); } @@ -894,10 +923,10 @@ unique_ptr<Mode> BuildLocalReplicaMode() { .Build(); unique_ptr<Action> delete_local_replica = - ActionBuilder("delete", &DeleteLocalReplica) - .Description("Delete a tablet replica from the local filesystem. " - "By default, leaves a tombstone record.") - .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc }) + ActionBuilder("delete", &DeleteLocalReplicas) + .Description("Delete tablet replicas from the local filesystem. " + "By default, leaves a tombstone record upon replica removal.") + .AddRequiredParameter({ kTabletIdsCsvArg, kTabletIdsCsvArgDesc }) .AddOptionalParameter("fs_data_dirs") .AddOptionalParameter("fs_metadata_dir") .AddOptionalParameter("fs_wal_dir")
