This is an automated email from the ASF dual-hosted git repository. abukor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 13a66ea9b088eec1de74249b738cc74333eefc4a Author: Attila Bukor <[email protected]> AuthorDate: Mon Nov 6 10:44:48 2023 +0100 [tools] KUDU-3337 Add unsafe_create_cmeta tool We've seen some cases when a power outage on XFS lead to empty cmeta files, causing some tablets to fail to start (KUDU-2195). There is a flag to force fsync, but it's disabled by default except for XFS. Fortunately, it's possible to reconstruct how a cmeta should look like based on the information found in ksck (peers) and WAL dumps (term and config index). Still, the only way to actually create a cmeta file even if this information is available, was to copy an existing cmeta file and run "kudu pbc edit" on it, which is very error-prone and hard to automate. This commit introduces a new unsafe_create_cmeta tool under local_replica, which creates a new cmeta file based on the term, config index and peers as provided in CLI arguments. I manually tested this tool by using it to recover a tablet with three empty cmeta files. Change-Id: I136cc5b5797420a9ca9156f37c3e281da0c265d7 Reviewed-on: http://gerrit.cloudera.org:8080/18029 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <[email protected]> --- src/kudu/tools/kudu-tool-test.cc | 102 +++++++++++++++++++++++++++- src/kudu/tools/tool_action_local_replica.cc | 70 +++++++++++++++++++ 2 files changed, 171 insertions(+), 1 deletion(-) diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 3aea9dcdc..a0a5e1184 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include <limits.h> #include <sys/stat.h> #include <algorithm> +#include <climits> #include <cstdint> #include <cstdio> #include <cstdlib> @@ -64,6 +64,7 @@ #include "kudu/common/wire_protocol-test-util.h" #include "kudu/common/wire_protocol.h" #include "kudu/consensus/consensus.pb.h" +#include "kudu/consensus/consensus.proxy.h" #include "kudu/consensus/log.h" #include "kudu/consensus/log_util.h" #include "kudu/consensus/opid.pb.h" @@ -1421,6 +1422,7 @@ TEST_F(ToolTest, TestModeHelp) { } { const vector<string> kLocalReplicaCMetaRegexes = { + "unsafe_recreate.*Rewrite the consensus metadata", "print_replica_uuids.*Print all tablet replica peer UUIDs", "rewrite_raft_config.*Rewrite a tablet replica", "set_term.*Bump the current term", @@ -4985,6 +4987,36 @@ TEST_F(ToolTest, TestLocalReplicaCMetaOps) { tablet_ids[i], ts_uuids[0])); } } + + constexpr const char* const tablet_id = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + constexpr const char* const create_cmeta_cmd = + "local_replica cmeta unsafe_recreate $0 $1 10 20 $2:$3"; + + // Test creating cmeta for a non-existing tablet. + { + NO_FATALS(RunActionStdoutNone(Substitute( + create_cmeta_cmd, + flags, tablet_id, ts_uuids[0], ts_host_port))); + } + + // Test that attempting to create cmeta for an existing tablet fails. + { + string stdout, stderr; + Status s = RunTool(Substitute(create_cmeta_cmd, + flags, tablet_id, ts_uuids[0], ts_host_port), + &stdout, &stderr); + ASSERT_FALSE(s.ok()); + ASSERT_EQ("", stdout); + ASSERT_STR_CONTAINS(stderr, "already exists"); + } + + // Test that creating cmeta for an existing tablet succeeds with "--force" + // option. + { + NO_FATALS(RunActionStdoutNone(Substitute( + create_cmeta_cmd, + flags + " --force", tablet_id, ts_uuids[0], ts_host_port))); + } } TEST_F(ToolTest, TestServerSetFlag) { @@ -8457,6 +8489,74 @@ TEST_F(ToolTest, TestCheckFSWithNonDefaultMetadataDir) { SCOPED_TRACE(stdout); } +TEST_F(ToolTest, TestRecreateCMeta) { + SKIP_IF_SLOW_NOT_ALLOWED(); + constexpr int kNumTservers = 3; + constexpr int kNumTablets = 1; + constexpr int kNumRows = 1000; + const MonoDelta kTimeout = MonoDelta::FromSeconds(30); + ExternalMiniClusterOptions opts; + opts.num_tablet_servers = kNumTservers; + NO_FATALS(StartExternalMiniCluster(std::move(opts))); + + TestWorkload workload(cluster_.get()); + workload.set_num_tablets(kNumTablets); + workload.set_num_replicas(kNumTservers); + workload.Setup(); + + workload.Start(); + while (workload.rows_inserted() < kNumRows) { + SleepFor(MonoDelta::FromMilliseconds(10)); + } + workload.StopAndJoin(); + + vector<string> tablet_ids; + TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()]; + ASSERT_OK(ListRunningTabletIds(ts, kTimeout, &tablet_ids)); + const string tablet_id = tablet_ids[0]; + + // Get opid and term. + consensus::GetLastOpIdRequestPB req; + consensus::GetLastOpIdResponsePB resp; + req.set_tablet_id(tablet_id); + req.set_dest_uuid(cluster_->tablet_server(0)->uuid()); + RpcController rpc; + rpc.set_timeout(kTimeout); + ts->consensus_proxy->GetLastOpId(req, &resp, &rpc); + const int opid = resp.opid().index(); + const int term = resp.opid().term(); + + // Get server UUIDs and hostports. + map<string, HostPort> servers; + for (auto i = 0; i < cluster_->num_tablet_servers(); i++) { + const auto ts = cluster_->tablet_server(i); + servers.emplace(ts->uuid(), ts->bound_rpc_hostport()); + } + + const string servers_string = JoinMapped(servers, + [](const pair<string, HostPort> server) { + return Substitute("$0:$1:$2", server.first, server.second.host(), server.second.port()); + }, " "); + + // Shut down the server and delete the cmeta file. + cluster_->tablet_server(0)->Shutdown(); + const auto wal_dir = cluster_->tablet_server(0)->wal_dir(); + Substitute("$0/consensus-meta/$1", wal_dir, tablet_id); + Env::Default()->DeleteFile(Substitute("$0/consensus-meta/$1", wal_dir, tablet_id)); + + // Set up and run the cmeta unsafe_recreate command. + const auto flags = Substitute("--fs_wal_dir=$0 --fs_data_dirs=$1", + cluster_->tablet_server(0)->wal_dir(), + cluster_->tablet_server(0)->data_dir()); + const auto cmd = Substitute("local_replica cmeta unsafe_recreate $0 $1 $2 $3 $4", + flags, tablet_id, term, opid, servers_string); + NO_FATALS(RunActionStdoutNone(cmd)); + + // Verify the cluster. + ASSERT_OK(cluster_->tablet_server(0)->Restart()); + NO_FATALS(ClusterVerifier(cluster_.get()).CheckCluster()); +} + TEST_F(ToolTest, TestReplaceTablet) { SKIP_IF_SLOW_NOT_ALLOWED(); constexpr int kNumTservers = 3; diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc index 122822619..3ca93755d 100644 --- a/src/kudu/tools/tool_action_local_replica.cc +++ b/src/kudu/tools/tool_action_local_replica.cc @@ -157,6 +157,7 @@ DEFINE_string(dst_fs_metadata_dir, "", "metadata directory if any exists. If none exists, --dst_fs_wal_dir " "will be used as the metadata directory.");; +DECLARE_bool(force); DECLARE_int32(num_threads); DECLARE_bool(tablet_copy_support_download_superblock_in_batch); DECLARE_int32(tablet_copy_download_threads_nums_per_session); @@ -674,6 +675,61 @@ Status DeleteRowsets(const RunnerContext& context) { return Status::OK(); } +Status UnsafeRecreateCmeta(const RunnerContext& context) { + const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg); + int64_t term; + int64_t opid_index; + try { + term = stoi(FindOrDie(context.required_args, "term")); + opid_index = stoi(FindOrDie(context.required_args, "index")); + } catch (...) { + return Status::InvalidArgument("term and index must be provided and must be numerical"); + } + + RaftConfigPB config; + config.set_opid_index(opid_index); + config.set_obsolete_local(false); + + set<string> uuids; + set<string> host_ports; + // Parse peer arguments. + for (const auto& arg : context.variadic_args) { + RaftPeerPB* peer = config.add_peers(); + peer->set_member_type(consensus::RaftPeerPB_MemberType_VOTER); + string uuid; + HostPort host_port; + RETURN_NOT_OK(ParsePeerString(arg, &uuid, &host_port)); + if (!uuids.emplace(uuid).second) { + return Status::InvalidArgument("Duplicate UUID: " + uuid); + } + if (!host_ports.emplace(host_port.ToString()).second) { + return Status::InvalidArgument("Duplicate RPC address: " + host_port.ToString()); + } + peer->set_permanent_uuid(uuid); + HostPortPB* hostPort = peer->mutable_last_known_addr(); + hostPort->set_host(host_port.host()); + hostPort->set_port(host_port.port()); + } + + // Load file system. + Env* env = Env::Default(); + FsManagerOpts fs_opts; + fs_opts.skip_block_manager = true; + FsManager fs_manager(env, std::move(fs_opts)); + RETURN_NOT_OK(fs_manager.Open()); + + // We need a scoped_refptr to avoid a check failure on RefCountedThreadSafe + // object deleted without calling Release(). + scoped_refptr<ConsensusMetadataManager> manager(new ConsensusMetadataManager(&fs_manager)); + + if (FLAGS_force && manager->Load(tablet_id).ok()) { + RETURN_NOT_OK(manager->Delete(tablet_id)); + } + + // Write the cmeta file. + return manager->Create(tablet_id, config, term); +} + Status CopyFromRemote(const RunnerContext& context) { // Parse the tablet ID and source arguments. const string& tablet_ids_str = FindOrDie(context.required_args, kTabletIdsCsvArg); @@ -1356,6 +1412,19 @@ unique_ptr<Mode> BuildLocalReplicaMode() { .AddOptionalParameter("fs_wal_dir") .Build(); + unique_ptr<Action> unsafe_recreate = + ActionBuilder("unsafe_recreate", &UnsafeRecreateCmeta) + .Description("Rewrite the consensus metadata based on the provided arguments") + .AddRequiredParameter({kTabletIdArg, kTabletIdArgDesc}) + .AddRequiredParameter({"term", "Raft term"}) + .AddRequiredParameter({"index", "OpId index"}) + .AddRequiredVariadicParameter({kRaftPeersArg, kRaftPeersArgDesc}) + .AddOptionalParameter("fs_data_dirs") + .AddOptionalParameter("fs_metadata_dir") + .AddOptionalParameter("fs_wal_dir") + .AddOptionalParameter("force") + .Build(); + unique_ptr<Mode> cmeta = ModeBuilder("cmeta") .Description("Operate on a local tablet replica's consensus " @@ -1363,6 +1432,7 @@ unique_ptr<Mode> BuildLocalReplicaMode() { .AddAction(std::move(print_replica_uuids)) .AddAction(std::move(rewrite_raft_config)) .AddAction(std::move(set_term)) + .AddAction(std::move(unsafe_recreate)) .Build(); unique_ptr<Mode> tmeta =
