Repository: kudu Updated Branches: refs/heads/master a1bfd7bac -> a72c28e69
KUDU-1769: fs check action with rudimentary data block GC This commit introduces a new "fs check" action to the CLI tool. For the time being its capabilities are slim: - Detecting missing blocks. - Detecting orphaned blocks, optionally "repairing" (i.e. deleting) them. I'm sure it'll be augmented to perform more consistency checks and repairs in the future. Why build this in the CLI tool and not in the block manager? A few reasons: 1. This "poor man" approach to GCing orphaned data blocks is extremely heavy-weight. Doing it at server startup is probably a bad idea. 2. Performance aside, finding a good time at server startup to compare the block lists will require additional refactoring and reordering. 3. If we change our minds, we can always move it later. I do expect that the block manager will take on some check or repair duties, but they'll be relegated to startup time, and will be much smaller in scope (e.g. the LBM could repunch holes that previously failed to punch). Change-Id: I82da8e234c338cd5a7540a22cebfa0a4958388ec Reviewed-on: http://gerrit.cloudera.org:8080/6361 Tested-by: Adar Dembo <[email protected]> Reviewed-by: Mike Percy <[email protected]> Reviewed-by: Dinesh Bhat <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/a72c28e6 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/a72c28e6 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/a72c28e6 Branch: refs/heads/master Commit: a72c28e699763cfeabadc4575b792e8d6cc1b191 Parents: a1bfd7b Author: Adar Dembo <[email protected]> Authored: Sat Mar 11 02:31:29 2017 -0800 Committer: Adar Dembo <[email protected]> Committed: Fri Mar 17 18:43:12 2017 +0000 ---------------------------------------------------------------------- src/kudu/tablet/tablet_metadata.cc | 26 +++-- src/kudu/tablet/tablet_metadata.h | 6 +- src/kudu/tools/kudu-tool-test.cc | 109 ++++++++++++++++++++ src/kudu/tools/tool_action_fs.cc | 102 +++++++++++++++++- src/kudu/tserver/tablet_copy_source_session.cc | 4 +- 5 files changed, 233 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/a72c28e6/src/kudu/tablet/tablet_metadata.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc index c71b9e3..501cb7e 100644 --- a/src/kudu/tablet/tablet_metadata.cc +++ b/src/kudu/tablet/tablet_metadata.cc @@ -129,25 +129,37 @@ Status TabletMetadata::LoadOrCreate(FsManager* fs_manager, } } -void TabletMetadata::CollectBlockIdPBs(const TabletSuperBlockPB& superblock, - std::vector<BlockIdPB>* block_ids) { +vector<BlockIdPB> TabletMetadata::CollectBlockIdPBs(const TabletSuperBlockPB& superblock) { + vector<BlockIdPB> block_ids; for (const RowSetDataPB& rowset : superblock.rowsets()) { for (const ColumnDataPB& column : rowset.columns()) { - block_ids->push_back(column.block()); + block_ids.push_back(column.block()); } for (const DeltaDataPB& redo : rowset.redo_deltas()) { - block_ids->push_back(redo.block()); + block_ids.push_back(redo.block()); } for (const DeltaDataPB& undo : rowset.undo_deltas()) { - block_ids->push_back(undo.block()); + block_ids.push_back(undo.block()); } if (rowset.has_bloom_block()) { - block_ids->push_back(rowset.bloom_block()); + block_ids.push_back(rowset.bloom_block()); } if (rowset.has_adhoc_index_block()) { - block_ids->push_back(rowset.adhoc_index_block()); + block_ids.push_back(rowset.adhoc_index_block()); } } + return block_ids; +} + +vector<BlockId> TabletMetadata::CollectBlockIds() { + vector<BlockId> block_ids; + for (const auto& r : rowsets_) { + vector<BlockId> rowset_block_ids = r->GetAllBlocks(); + block_ids.insert(block_ids.begin(), + rowset_block_ids.begin(), + rowset_block_ids.end()); + } + return block_ids; } Status TabletMetadata::DeleteTabletData(TabletDataState delete_type, http://git-wip-us.apache.org/repos/asf/kudu/blob/a72c28e6/src/kudu/tablet/tablet_metadata.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h index 7a03a2a..d295d51 100644 --- a/src/kudu/tablet/tablet_metadata.h +++ b/src/kudu/tablet/tablet_metadata.h @@ -92,8 +92,10 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> { const TabletDataState& initial_tablet_data_state, scoped_refptr<TabletMetadata>* metadata); - static void CollectBlockIdPBs(const TabletSuperBlockPB& superblock, - std::vector<BlockIdPB>* block_ids); + static std::vector<BlockIdPB> CollectBlockIdPBs( + const TabletSuperBlockPB& superblock); + + std::vector<BlockId> CollectBlockIds(); const std::string& tablet_id() const { DCHECK_NE(state_, kNotLoadedYet); http://git-wip-us.apache.org/repos/asf/kudu/blob/a72c28e6/src/kudu/tools/kudu-tool-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 9fd85ee..e7052a3 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -41,6 +41,7 @@ #include "kudu/consensus/opid.pb.h" #include "kudu/consensus/opid_util.h" #include "kudu/consensus/ref_counted_replicate.h" +#include "kudu/fs/block_id.h" #include "kudu/fs/block_manager.h" #include "kudu/fs/fs_manager.h" #include "kudu/gutil/gscoped_ptr.h" @@ -217,6 +218,26 @@ class ToolTest : public KuduTest { } } + void RunFsCheck(const string& arg_str, + int expected_num_blocks, + int expected_num_missing, + int expected_num_orphaned) { + string stdout; + string stderr; + Status s = RunTool(arg_str, &stdout, &stderr, nullptr, nullptr); + SCOPED_TRACE(stdout); + SCOPED_TRACE(stderr); + if (expected_num_missing) { + ASSERT_TRUE(s.IsRuntimeError()); + ASSERT_STR_CONTAINS(stderr, "Corruption"); + } else { + ASSERT_TRUE(s.ok()); + } + ASSERT_STR_CONTAINS(stdout, Substitute("$0 blocks", expected_num_blocks)); + ASSERT_STR_CONTAINS(stdout, Substitute("$0 missing", expected_num_missing)); + ASSERT_STR_CONTAINS(stdout, Substitute("$0 orphaned", expected_num_orphaned)); + } + protected: void RunLoadgen(int num_tservers = 1, const vector<string>& tool_args = {}, @@ -279,6 +300,7 @@ TEST_F(ToolTest, TestTopLevelHelp) { TEST_F(ToolTest, TestModeHelp) { { const vector<string> kFsModeRegexes = { + "check.*Kudu filesystem for inconsistencies", "format.*new Kudu filesystem", "dump.*Dump a Kudu filesystem" }; @@ -418,6 +440,93 @@ TEST_F(ToolTest, TestActionHelp) { Status::InvalidArgument("too many arguments: 'extra_arg'"))); } +TEST_F(ToolTest, TestFsCheck) { + const string kTestDir = GetTestPath("test"); + const string kTabletId = "test-tablet"; + const Schema kSchema(GetSimpleTestSchema()); + const Schema kSchemaWithIds(SchemaBuilder(kSchema).Build()); + + // Create a local replica, flush some rows a few times, and collect all + // of the created block IDs. + vector<BlockId> block_ids; + { + TabletHarness::Options opts(kTestDir); + opts.tablet_id = kTabletId; + TabletHarness harness(kSchemaWithIds, opts); + ASSERT_OK(harness.Create(true)); + ASSERT_OK(harness.Open()); + LocalTabletWriter writer(harness.tablet().get(), &kSchema); + KuduPartialRow row(&kSchemaWithIds); + + for (int num_flushes = 0; num_flushes < 10; num_flushes++) { + for (int i = 0; i < 10; i++) { + ASSERT_OK(row.SetInt32(0, num_flushes * i)); + ASSERT_OK(row.SetInt32(1, num_flushes * i * 10)); + ASSERT_OK(row.SetStringCopy(2, "HelloWorld")); + writer.Insert(row); + } + harness.tablet()->Flush(); + } + block_ids = harness.tablet()->metadata()->CollectBlockIds(); + harness.tablet()->Shutdown(); + } + + // Check the filesystem; all the blocks should be accounted for, and there + // should be no blocks missing or orphaned. + NO_FATALS(RunFsCheck(Substitute("fs check --fs_wal_dir=$0", kTestDir), + block_ids.size(), 0, 0)); + + // Delete half of the blocks. Upon the next check we can only find half, and + // the other half are deemed missing. + { + FsManager fs(env_, kTestDir); + ASSERT_OK(fs.Open()); + for (int i = 0; i < block_ids.size(); i += 2) { + ASSERT_OK(fs.DeleteBlock(block_ids[i])); + } + } + NO_FATALS(RunFsCheck(Substitute("fs check --fs_wal_dir=$0", kTestDir), + block_ids.size() / 2, block_ids.size() / 2, 0)); + + // Delete the tablet superblock. The next check finds half of the blocks, + // though without the superblock they're all considered to be orphaned. + // + // Here we check twice to show that if --repair isn't provided, there should + // be no effect. + { + FsManager fs(env_, kTestDir); + ASSERT_OK(fs.Open()); + ASSERT_OK(env_->DeleteFile(fs.GetTabletMetadataPath(kTabletId))); + } + for (int i = 0; i < 2; i++) { + NO_FATALS(RunFsCheck(Substitute("fs check --fs_wal_dir=$0", kTestDir), + block_ids.size() / 2, 0, block_ids.size() / 2)); + } + + // Repair the filesystem. The remaining half of all blocks were found, deemed + // to be orphaned, and deleted. The next check shows no remaining blocks. + NO_FATALS(RunFsCheck(Substitute("fs check --fs_wal_dir=$0 --repair", kTestDir), + block_ids.size() / 2, 0, block_ids.size() / 2)); + NO_FATALS(RunFsCheck(Substitute("fs check --fs_wal_dir=$0", kTestDir), + 0, 0, 0)); +} + +TEST_F(ToolTest, TestFsCheckLiveServer) { + NO_FATALS(StartExternalMiniCluster()); + string master_data_dir = cluster_->GetDataPath("master-0"); + string args = Substitute("fs check --fs_wal_dir $0", master_data_dir); + NO_FATALS(RunFsCheck(args, 0, 0, 0)); + args += " --repair"; + string stdout; + string stderr; + Status s = RunTool(args, &stdout, &stderr, nullptr, nullptr); + SCOPED_TRACE(stdout); + SCOPED_TRACE(stderr); + ASSERT_TRUE(s.IsRuntimeError()); + ASSERT_TRUE(stdout.empty()); + ASSERT_STR_CONTAINS(stderr, "Could not lock"); +} + TEST_F(ToolTest, TestFsFormat) { const string kTestDir = GetTestPath("test"); NO_FATALS(RunActionStdoutNone(Substitute("fs format --fs_wal_dir=$0", kTestDir))); http://git-wip-us.apache.org/repos/asf/kudu/blob/a72c28e6/src/kudu/tools/tool_action_fs.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc index 6245b87..59a4c21 100644 --- a/src/kudu/tools/tool_action_fs.cc +++ b/src/kudu/tools/tool_action_fs.cc @@ -17,18 +17,26 @@ #include "kudu/tools/tool_action.h" +#include <algorithm> #include <iostream> +#include <iterator> #include <memory> #include <string> +#include <unordered_map> +#include <utility> +#include <vector> #include <boost/optional/optional.hpp> #include <gflags/gflags.h> #include "kudu/cfile/cfile_reader.h" #include "kudu/cfile/cfile_util.h" +#include "kudu/fs/block_id.h" #include "kudu/fs/fs_manager.h" +#include "kudu/gutil/ref_counted.h" #include "kudu/gutil/strings/numbers.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/tablet/tablet_metadata.h" #include "kudu/tools/tool_action_common.h" #include "kudu/util/pb_util.h" #include "kudu/util/status.h" @@ -39,6 +47,10 @@ DEFINE_bool(print_rows, true, DEFINE_string(uuid, "", "The uuid to use in the filesystem. " "If not provided, one is generated"); +DEFINE_bool(repair, false, + "Repair any inconsistencies in the filesystem."); +DEFINE_bool(verbose, false, + "Provide verbose output."); namespace kudu { namespace tools { @@ -50,11 +62,85 @@ using std::cout; using std::endl; using std::string; using std::unique_ptr; +using std::unordered_map; +using std::vector; using strings::Substitute; +using tablet::TabletMetadata; namespace { -Status Format(const RunnerContext& context) { +Status Check(const RunnerContext& /*context*/) { + FsManagerOpts opts; + opts.read_only = !FLAGS_repair; + FsManager fs_manager(Env::Default(), opts); + RETURN_NOT_OK(fs_manager.Open()); + + // Get the "live" block IDs (i.e. those referenced by a tablet). + vector<BlockId> live_block_ids; + unordered_map<BlockId, string, BlockIdHash, BlockIdEqual> live_block_id_to_tablet; + vector<string> tablet_ids; + RETURN_NOT_OK(fs_manager.ListTabletIds(&tablet_ids)); + for (const auto& t : tablet_ids) { + scoped_refptr<TabletMetadata> meta; + RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, t, &meta)); + vector<BlockId> tablet_live_block_ids = meta->CollectBlockIds(); + live_block_ids.insert(live_block_ids.end(), + tablet_live_block_ids.begin(), + tablet_live_block_ids.end()); + for (const auto& id : tablet_live_block_ids) { + InsertOrDie(&live_block_id_to_tablet, id, t); + } + } + + // Get all of the block IDs reachable by the block manager. + vector<BlockId> all_block_ids; + RETURN_NOT_OK(fs_manager.block_manager()->GetAllBlockIds(&all_block_ids)); + + std::sort(live_block_ids.begin(), live_block_ids.end(), BlockIdCompare()); + std::sort(all_block_ids.begin(), all_block_ids.end(), BlockIdCompare()); + + // Blocks found in the block manager but not in a tablet. They are orphaned + // and can be safely deleted. + vector<BlockId> orphaned_block_ids; + std::set_difference(all_block_ids.begin(), all_block_ids.end(), + live_block_ids.begin(), live_block_ids.end(), + std::back_inserter(orphaned_block_ids), BlockIdCompare()); + + // Blocks found in a tablet but not in the block manager. They are missing + // and indicative of corruption in the associated tablet(s). + vector<BlockId> missing_block_ids; + std::set_difference(live_block_ids.begin(), live_block_ids.end(), + all_block_ids.begin(), all_block_ids.end(), + std::back_inserter(missing_block_ids), BlockIdCompare()); + + if (FLAGS_verbose) { + for (const auto& id : missing_block_ids) { + cout << Substitute("Block $0 (referenced by tablet $1) is missing\n", + id.ToString(), + FindOrDie(live_block_id_to_tablet, id)); + } + } + + for (const auto& id : orphaned_block_ids) { + if (FLAGS_verbose) { + cout << Substitute("Block $0 is not referenced by any tablets$1\n", + id.ToString(), FLAGS_repair ? " (deleting)" : ""); + } + if (FLAGS_repair) { + RETURN_NOT_OK(fs_manager.DeleteBlock(id)); + } + } + + cout << Substitute("Summary: $0 blocks total ($1 missing, $2 orphaned$3)\n", + all_block_ids.size(), + missing_block_ids.size(), + orphaned_block_ids.size(), + FLAGS_repair ? " and deleted" : ""); + return missing_block_ids.empty() ? Status::OK() : + Status::Corruption("Irreparable filesystem corruption detected"); +} + +Status Format(const RunnerContext& /*context*/) { FsManager fs_manager(Env::Default(), FsManagerOpts()); boost::optional<string> uuid; if (!FLAGS_uuid.empty()) { @@ -63,7 +149,7 @@ Status Format(const RunnerContext& context) { return fs_manager.CreateInitialFileSystemLayout(uuid); } -Status DumpUuid(const RunnerContext& context) { +Status DumpUuid(const RunnerContext& /*context*/) { FsManagerOpts opts; opts.read_only = true; FsManager fs_manager(Env::Default(), opts); @@ -108,7 +194,7 @@ Status DumpCFile(const RunnerContext& context) { return Status::OK(); } -Status DumpFsTree(const RunnerContext& context) { +Status DumpFsTree(const RunnerContext& /*context*/) { FsManagerOpts fs_opts; fs_opts.read_only = true; FsManager fs_manager(Env::Default(), fs_opts); @@ -154,6 +240,15 @@ static unique_ptr<Mode> BuildFsDumpMode() { } unique_ptr<Mode> BuildFsMode() { + unique_ptr<Action> check = + ActionBuilder("check", &Check) + .Description("Check a Kudu filesystem for inconsistencies") + .AddOptionalParameter("fs_wal_dir") + .AddOptionalParameter("fs_data_dirs") + .AddOptionalParameter("repair") + .AddOptionalParameter("verbose") + .Build(); + unique_ptr<Action> format = ActionBuilder("format", &Format) .Description("Format a new Kudu filesystem") @@ -165,6 +260,7 @@ unique_ptr<Mode> BuildFsMode() { return ModeBuilder("fs") .Description("Operate on a local Kudu filesystem") .AddMode(BuildFsDumpMode()) + .AddAction(std::move(check)) .AddAction(std::move(format)) .Build(); } http://git-wip-us.apache.org/repos/asf/kudu/blob/a72c28e6/src/kudu/tserver/tablet_copy_source_session.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tserver/tablet_copy_source_session.cc b/src/kudu/tserver/tablet_copy_source_session.cc index dc2f727..9976f39 100644 --- a/src/kudu/tserver/tablet_copy_source_session.cc +++ b/src/kudu/tserver/tablet_copy_source_session.cc @@ -88,8 +88,8 @@ Status TabletCopySourceSession::Init() { // Anchor the data blocks by opening them and adding them to the cache. // // All subsequent requests should reuse the opened blocks. - vector<BlockIdPB> data_blocks; - TabletMetadata::CollectBlockIdPBs(tablet_superblock_, &data_blocks); + vector<BlockIdPB> data_blocks = + TabletMetadata::CollectBlockIdPBs(tablet_superblock_); for (const BlockIdPB& block_id : data_blocks) { VLOG(1) << "Opening block " << SecureDebugString(block_id); RETURN_NOT_OK(OpenBlockUnlocked(BlockId::FromPB(block_id)));
