This is an automated email from the ASF dual-hosted git repository.

abukor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 13a66ea9b088eec1de74249b738cc74333eefc4a
Author: Attila Bukor <[email protected]>
AuthorDate: Mon Nov 6 10:44:48 2023 +0100

    [tools] KUDU-3337 Add unsafe_create_cmeta tool
    
    We've seen some cases when a power outage on XFS lead to empty cmeta
    files, causing some tablets to fail to start (KUDU-2195). There is a
    flag to force fsync, but it's disabled by default except for XFS.
    
    Fortunately, it's possible to reconstruct how a cmeta should look like
    based on the information found in ksck (peers) and WAL dumps (term and
    config index). Still, the only way to actually create a cmeta file even
    if this information is available, was to copy an existing cmeta file and
    run "kudu pbc edit" on it, which is very error-prone and hard to
    automate.
    
    This commit introduces a new unsafe_create_cmeta tool under
    local_replica, which creates a new cmeta file based on the term, config
    index and peers as provided in CLI arguments.
    
    I manually tested this tool by using it to recover a tablet with three
    empty cmeta files.
    
    Change-Id: I136cc5b5797420a9ca9156f37c3e281da0c265d7
    Reviewed-on: http://gerrit.cloudera.org:8080/18029
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/tools/kudu-tool-test.cc            | 102 +++++++++++++++++++++++++++-
 src/kudu/tools/tool_action_local_replica.cc |  70 +++++++++++++++++++
 2 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 3aea9dcdc..a0a5e1184 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <limits.h>
 #include <sys/stat.h>
 
 #include <algorithm>
+#include <climits>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
@@ -64,6 +64,7 @@
 #include "kudu/common/wire_protocol-test-util.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/consensus/consensus.pb.h"
+#include "kudu/consensus/consensus.proxy.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_util.h"
 #include "kudu/consensus/opid.pb.h"
@@ -1421,6 +1422,7 @@ TEST_F(ToolTest, TestModeHelp) {
   }
   {
     const vector<string> kLocalReplicaCMetaRegexes = {
+        "unsafe_recreate.*Rewrite the consensus metadata",
         "print_replica_uuids.*Print all tablet replica peer UUIDs",
         "rewrite_raft_config.*Rewrite a tablet replica",
         "set_term.*Bump the current term",
@@ -4985,6 +4987,36 @@ TEST_F(ToolTest, TestLocalReplicaCMetaOps) {
                                               tablet_ids[i], ts_uuids[0]));
     }
   }
+
+  constexpr const char* const tablet_id = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  constexpr const char* const create_cmeta_cmd =
+    "local_replica cmeta unsafe_recreate $0 $1 10 20 $2:$3";
+
+  // Test creating cmeta for a non-existing tablet.
+  {
+    NO_FATALS(RunActionStdoutNone(Substitute(
+            create_cmeta_cmd,
+            flags, tablet_id, ts_uuids[0], ts_host_port)));
+  }
+
+  // Test that attempting to create cmeta for an existing tablet fails.
+  {
+    string stdout, stderr;
+    Status s = RunTool(Substitute(create_cmeta_cmd,
+                                  flags, tablet_id, ts_uuids[0], ts_host_port),
+                       &stdout, &stderr);
+    ASSERT_FALSE(s.ok());
+    ASSERT_EQ("", stdout);
+    ASSERT_STR_CONTAINS(stderr, "already exists");
+  }
+
+  // Test that creating cmeta for an existing tablet succeeds with "--force"
+  // option.
+  {
+    NO_FATALS(RunActionStdoutNone(Substitute(
+            create_cmeta_cmd,
+            flags + " --force", tablet_id, ts_uuids[0], ts_host_port)));
+  }
 }
 
 TEST_F(ToolTest, TestServerSetFlag) {
@@ -8457,6 +8489,74 @@ TEST_F(ToolTest, TestCheckFSWithNonDefaultMetadataDir) {
   SCOPED_TRACE(stdout);
 }
 
+TEST_F(ToolTest, TestRecreateCMeta) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+  constexpr int kNumTservers = 3;
+  constexpr int kNumTablets = 1;
+  constexpr int kNumRows = 1000;
+  const MonoDelta kTimeout = MonoDelta::FromSeconds(30);
+  ExternalMiniClusterOptions opts;
+  opts.num_tablet_servers = kNumTservers;
+  NO_FATALS(StartExternalMiniCluster(std::move(opts)));
+
+  TestWorkload workload(cluster_.get());
+  workload.set_num_tablets(kNumTablets);
+  workload.set_num_replicas(kNumTservers);
+  workload.Setup();
+
+  workload.Start();
+  while (workload.rows_inserted() < kNumRows) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+  workload.StopAndJoin();
+
+  vector<string> tablet_ids;
+  TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];
+  ASSERT_OK(ListRunningTabletIds(ts, kTimeout, &tablet_ids));
+  const string tablet_id = tablet_ids[0];
+
+  // Get opid and term.
+  consensus::GetLastOpIdRequestPB req;
+  consensus::GetLastOpIdResponsePB resp;
+  req.set_tablet_id(tablet_id);
+  req.set_dest_uuid(cluster_->tablet_server(0)->uuid());
+  RpcController rpc;
+  rpc.set_timeout(kTimeout);
+  ts->consensus_proxy->GetLastOpId(req, &resp, &rpc);
+  const int opid = resp.opid().index();
+  const int term = resp.opid().term();
+
+  // Get server UUIDs and hostports.
+  map<string, HostPort> servers;
+  for (auto i = 0; i < cluster_->num_tablet_servers(); i++) {
+    const auto ts = cluster_->tablet_server(i);
+    servers.emplace(ts->uuid(), ts->bound_rpc_hostport());
+  }
+
+  const string servers_string = JoinMapped(servers,
+                                           [](const pair<string, HostPort> 
server) {
+      return Substitute("$0:$1:$2", server.first, server.second.host(), 
server.second.port());
+      }, " ");
+
+  // Shut down the server and delete the cmeta file.
+  cluster_->tablet_server(0)->Shutdown();
+  const auto wal_dir = cluster_->tablet_server(0)->wal_dir();
+  Substitute("$0/consensus-meta/$1", wal_dir, tablet_id);
+  Env::Default()->DeleteFile(Substitute("$0/consensus-meta/$1", wal_dir, 
tablet_id));
+
+  // Set up and run the cmeta unsafe_recreate command.
+  const auto flags = Substitute("--fs_wal_dir=$0 --fs_data_dirs=$1",
+                                cluster_->tablet_server(0)->wal_dir(),
+                                cluster_->tablet_server(0)->data_dir());
+  const auto cmd = Substitute("local_replica cmeta unsafe_recreate $0 $1 $2 $3 
$4",
+                              flags, tablet_id, term, opid, servers_string);
+  NO_FATALS(RunActionStdoutNone(cmd));
+
+  // Verify the cluster.
+  ASSERT_OK(cluster_->tablet_server(0)->Restart());
+  NO_FATALS(ClusterVerifier(cluster_.get()).CheckCluster());
+}
+
 TEST_F(ToolTest, TestReplaceTablet) {
   SKIP_IF_SLOW_NOT_ALLOWED();
   constexpr int kNumTservers = 3;
diff --git a/src/kudu/tools/tool_action_local_replica.cc 
b/src/kudu/tools/tool_action_local_replica.cc
index 122822619..3ca93755d 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -157,6 +157,7 @@ DEFINE_string(dst_fs_metadata_dir, "",
               "metadata directory if any exists. If none exists, 
--dst_fs_wal_dir "
               "will be used as the metadata directory.");;
 
+DECLARE_bool(force);
 DECLARE_int32(num_threads);
 DECLARE_bool(tablet_copy_support_download_superblock_in_batch);
 DECLARE_int32(tablet_copy_download_threads_nums_per_session);
@@ -674,6 +675,61 @@ Status DeleteRowsets(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status UnsafeRecreateCmeta(const RunnerContext& context) {
+  const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
+  int64_t term;
+  int64_t opid_index;
+  try {
+    term = stoi(FindOrDie(context.required_args, "term"));
+    opid_index = stoi(FindOrDie(context.required_args, "index"));
+  } catch (...) {
+    return Status::InvalidArgument("term and index must be provided and must 
be numerical");
+  }
+
+  RaftConfigPB config;
+  config.set_opid_index(opid_index);
+  config.set_obsolete_local(false);
+
+  set<string> uuids;
+  set<string> host_ports;
+  // Parse peer arguments.
+  for (const auto& arg : context.variadic_args) {
+    RaftPeerPB* peer = config.add_peers();
+    peer->set_member_type(consensus::RaftPeerPB_MemberType_VOTER);
+    string uuid;
+    HostPort host_port;
+    RETURN_NOT_OK(ParsePeerString(arg, &uuid, &host_port));
+    if (!uuids.emplace(uuid).second) {
+      return Status::InvalidArgument("Duplicate UUID: " + uuid);
+    }
+    if (!host_ports.emplace(host_port.ToString()).second) {
+      return Status::InvalidArgument("Duplicate RPC address: " + 
host_port.ToString());
+    }
+    peer->set_permanent_uuid(uuid);
+    HostPortPB* hostPort = peer->mutable_last_known_addr();
+    hostPort->set_host(host_port.host());
+    hostPort->set_port(host_port.port());
+  }
+
+  // Load file system.
+  Env* env = Env::Default();
+  FsManagerOpts fs_opts;
+  fs_opts.skip_block_manager = true;
+  FsManager fs_manager(env, std::move(fs_opts));
+  RETURN_NOT_OK(fs_manager.Open());
+
+  // We need a scoped_refptr to avoid a check failure on RefCountedThreadSafe
+  // object deleted without calling Release().
+  scoped_refptr<ConsensusMetadataManager> manager(new 
ConsensusMetadataManager(&fs_manager));
+
+  if (FLAGS_force && manager->Load(tablet_id).ok()) {
+    RETURN_NOT_OK(manager->Delete(tablet_id));
+  }
+
+  // Write the cmeta file.
+  return manager->Create(tablet_id, config, term);
+}
+
 Status CopyFromRemote(const RunnerContext& context) {
   // Parse the tablet ID and source arguments.
   const string& tablet_ids_str = FindOrDie(context.required_args, 
kTabletIdsCsvArg);
@@ -1356,6 +1412,19 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
           .AddOptionalParameter("fs_wal_dir")
           .Build();
 
+  unique_ptr<Action> unsafe_recreate =
+      ActionBuilder("unsafe_recreate", &UnsafeRecreateCmeta)
+      .Description("Rewrite the consensus metadata based on the provided 
arguments")
+      .AddRequiredParameter({kTabletIdArg, kTabletIdArgDesc})
+      .AddRequiredParameter({"term", "Raft term"})
+      .AddRequiredParameter({"index", "OpId index"})
+      .AddRequiredVariadicParameter({kRaftPeersArg, kRaftPeersArgDesc})
+      .AddOptionalParameter("fs_data_dirs")
+      .AddOptionalParameter("fs_metadata_dir")
+      .AddOptionalParameter("fs_wal_dir")
+      .AddOptionalParameter("force")
+      .Build();
+
   unique_ptr<Mode> cmeta =
       ModeBuilder("cmeta")
       .Description("Operate on a local tablet replica's consensus "
@@ -1363,6 +1432,7 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
       .AddAction(std::move(print_replica_uuids))
       .AddAction(std::move(rewrite_raft_config))
       .AddAction(std::move(set_term))
+      .AddAction(std::move(unsafe_recreate))
       .Build();
 
   unique_ptr<Mode> tmeta =

Reply via email to