Repository: kudu Updated Branches: refs/heads/master 6626e109d -> b81d5569a
fs: defer failure from metadata load to bootstrap when data dir is missing An upcoming patch adds a CLI tool action to remove data directories. When a data dir is removed, all tablets with data on it will fail. Today that failure manifests as a FindOrDie in DataDirGroup::FromPB; we need to make that a little bit more graceful. This patch modifies the DataDirGroup FromPB/CopyToPB methods to return a failure when a data dir is missing. It further changes TabletMetadata to treat such failures as non-fatal, and adds checks to TabletBootstrap so that the failures manifest there instead. No tests in this patch because: 1. Andrew has already merged tablet-level tests for failed disks, and 2. The CLI tool patch adds coverage at the itest-level. Change-Id: I1e8d5697c2bb08287cce11fbdab6fb8d6e37d1ad Reviewed-on: http://gerrit.cloudera.org:8080/8376 Reviewed-by: Todd Lipcon <[email protected]> Reviewed-by: Andrew Wong <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b81d5569 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b81d5569 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b81d5569 Branch: refs/heads/master Commit: b81d5569a0fa9ee4d787b93703f210c2e2f8d93d Parents: 6626e10 Author: Adar Dembo <[email protected]> Authored: Tue Oct 24 18:18:39 2017 -0700 Committer: Adar Dembo <[email protected]> Committed: Sun Jan 21 20:05:22 2018 +0000 ---------------------------------------------------------------------- src/kudu/fs/block_manager-stress-test.cc | 2 +- src/kudu/fs/block_manager-test.cc | 10 +-- src/kudu/fs/data_dirs-test.cc | 11 ++-- src/kudu/fs/data_dirs.cc | 88 +++++++++++++++++++++++---- src/kudu/fs/data_dirs.h | 58 +++++++++--------- src/kudu/fs/log_block_manager-test.cc | 2 +- src/kudu/tablet/tablet_bootstrap.cc | 13 +++- src/kudu/tablet/tablet_metadata.cc | 11 ++-- src/kudu/tserver/tablet_copy_client.cc | 4 +- src/kudu/tserver/ts_tablet_manager.cc | 8 --- 10 files changed, 137 insertions(+), 70 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/fs/block_manager-stress-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/block_manager-stress-test.cc b/src/kudu/fs/block_manager-stress-test.cc index 6ae4a2c..f5bedf3 100644 --- a/src/kudu/fs/block_manager-stress-test.cc +++ b/src/kudu/fs/block_manager-stress-test.cc @@ -150,7 +150,7 @@ class BlockManagerStressTest : public KuduTest { bm_.reset(CreateBlockManager()); bm_->Open(nullptr); dd_manager_->CreateDataDirGroup(test_tablet_name_); - CHECK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); + CHECK_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); } virtual void TearDown() override { http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/fs/block_manager-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/block_manager-test.cc b/src/kudu/fs/block_manager-test.cc index 46c8fec..e17a61e 100644 --- a/src/kudu/fs/block_manager-test.cc +++ b/src/kudu/fs/block_manager-test.cc @@ -119,9 +119,9 @@ class BlockManagerTest : public KuduTest { virtual void SetUp() override { // Pass in a report to prevent the block manager from logging unnecessarily. FsReport report; - CHECK_OK(bm_->Open(&report)); - CHECK_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); - CHECK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); + ASSERT_OK(bm_->Open(&report)); + ASSERT_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); } void DistributeBlocksAcrossDirs(int num_dirs, int num_blocks_per_dir) { @@ -231,7 +231,7 @@ void BlockManagerTest<LogBlockManager>::SetUp() { ASSERT_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); // Store the DataDirGroupPB for tests that reopen the block manager. - CHECK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); } template <> @@ -486,7 +486,7 @@ TYPED_TEST(BlockManagerTest, CreateBlocksInDataDirs) { DataDirGroupPB test_group_pb; // Check that the in-memory DataDirGroup did not change. - ASSERT_TRUE(this->dd_manager_->GetDataDirGroupPB( + ASSERT_OK(this->dd_manager_->GetDataDirGroupPB( this->test_tablet_name_, &test_group_pb)); ASSERT_TRUE(MessageDifferencer::Equals(test_group_pb, this->test_group_pb_)); } http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/fs/data_dirs-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/data_dirs-test.cc b/src/kudu/fs/data_dirs-test.cc index e5ea7e2..449780a 100644 --- a/src/kudu/fs/data_dirs-test.cc +++ b/src/kudu/fs/data_dirs-test.cc @@ -116,7 +116,7 @@ TEST_F(DataDirsTest, TestCreateGroup) { DataDirGroupPB orig_pb; ASSERT_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); - ASSERT_TRUE(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &orig_pb)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &orig_pb)); // Ensure that the DataDirManager will not create a group for a tablet that // it already knows about. @@ -125,7 +125,7 @@ TEST_F(DataDirsTest, TestCreateGroup) { ASSERT_STR_CONTAINS(s.ToString(), "Tried to create directory group for tablet " "but one is already registered"); DataDirGroupPB pb; - ASSERT_TRUE(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &pb)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &pb)); // Verify that the data directory is unchanged after failing to create an // existing tablet. @@ -153,7 +153,7 @@ TEST_F(DataDirsTest, TestLoadFromPB) { // Create a PB, delete the group, then load the group from the PB. DataDirGroupPB orig_pb; ASSERT_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); - ASSERT_TRUE(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &orig_pb)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &orig_pb)); dd_manager_->DeleteDataDirGroup(test_tablet_name_); ASSERT_OK(dd_manager_->LoadDataDirGroupFromPB(test_tablet_name_, orig_pb)); @@ -171,8 +171,7 @@ TEST_F(DataDirsTest, TestLoadFromPB) { // knows about the tablet. Status s = dd_manager_->LoadDataDirGroupFromPB(test_tablet_name_, orig_pb); ASSERT_TRUE(s.IsAlreadyPresent()) << s.ToString(); - ASSERT_STR_CONTAINS(s.ToString(), "Tried to load directory group for tablet but " - "one is already registered"); + ASSERT_STR_CONTAINS(s.ToString(), "tried to load directory group for tablet"); } TEST_F(DataDirsTest, TestDeleteDataDirGroup) { @@ -237,7 +236,7 @@ TEST_F(DataDirsTest, TestFailedDirNotAddedToGroup) { entity_->FindOrNull(METRIC_data_dirs_failed).get())->value()); ASSERT_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); DataDirGroupPB pb; - ASSERT_TRUE(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &pb)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &pb)); ASSERT_EQ(kNumDirs - 1, pb.uuids_size()); // Check that all uuid_indices are valid and are not in the failed directory http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/fs/data_dirs.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc index 32ee476..46cae9c 100644 --- a/src/kudu/fs/data_dirs.cc +++ b/src/kudu/fs/data_dirs.cc @@ -38,6 +38,7 @@ #include "kudu/fs/block_manager.h" #include "kudu/fs/block_manager_util.h" +#include "kudu/fs/fs.pb.h" #include "kudu/gutil/bind.h" #include "kudu/gutil/gscoped_ptr.h" #include "kudu/gutil/integral_types.h" @@ -187,6 +188,10 @@ void DeleteTmpFilesRecursively(Env* env, const string& path) { } // anonymous namespace +//////////////////////////////////////////////////////////// +// DataDirMetrics +//////////////////////////////////////////////////////////// + #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0)) DataDirMetrics::DataDirMetrics(const scoped_refptr<MetricEntity>& entity) : GINIT(data_dirs_failed), @@ -194,6 +199,10 @@ DataDirMetrics::DataDirMetrics(const scoped_refptr<MetricEntity>& entity) } #undef GINIT +//////////////////////////////////////////////////////////// +// DataDir +//////////////////////////////////////////////////////////// + DataDir::DataDir(Env* env, DataDirMetrics* metrics, DataDirFsType fs_type, @@ -280,10 +289,61 @@ Status DataDir::RefreshIsFull(RefreshMode mode) { return Status::OK(); } +//////////////////////////////////////////////////////////// +// DataDirGroup +//////////////////////////////////////////////////////////// + +DataDirGroup::DataDirGroup() {} + +DataDirGroup::DataDirGroup(vector<int> uuid_indices) + : uuid_indices_(std::move(uuid_indices)) {} + +Status DataDirGroup::LoadFromPB(const UuidIndexByUuidMap& uuid_idx_by_uuid, + const DataDirGroupPB& pb) { + vector<int> uuid_indices; + for (const auto& uuid : pb.uuids()) { + int uuid_idx; + if (!FindCopy(uuid_idx_by_uuid, uuid, &uuid_idx)) { + return Status::NotFound(Substitute( + "could not find data dir with uuid $0", uuid)); + } + uuid_indices.emplace_back(uuid_idx); + } + + uuid_indices_ = std::move(uuid_indices); + return Status::OK(); +} + +Status DataDirGroup::CopyToPB(const UuidByUuidIndexMap& uuid_by_uuid_idx, + DataDirGroupPB* pb) const { + DCHECK(pb); + DataDirGroupPB group; + for (auto uuid_idx : uuid_indices_) { + string uuid; + if (!FindCopy(uuid_by_uuid_idx, uuid_idx, &uuid)) { + return Status::NotFound(Substitute( + "could not find data dir with uuid index $0", uuid_idx)); + } + group.mutable_uuids()->Add(std::move(uuid)); + } + + *pb = std::move(group); + return Status::OK(); +} + +//////////////////////////////////////////////////////////// +// DataDirManagerOptions +//////////////////////////////////////////////////////////// + DataDirManagerOptions::DataDirManagerOptions() - : block_manager_type(FLAGS_block_manager), - read_only(false), - update_on_disk(false) {} + : block_manager_type(FLAGS_block_manager), + read_only(false), + update_on_disk(false) { +} + +//////////////////////////////////////////////////////////// +// DataDirManager +//////////////////////////////////////////////////////////// vector<string> DataDirManager::GetRootNames(const CanonicalizedRootsList& root_list) { vector<string> roots; @@ -782,13 +842,16 @@ Status DataDirManager::Open() { Status DataDirManager::LoadDataDirGroupFromPB(const std::string& tablet_id, const DataDirGroupPB& pb) { std::lock_guard<percpu_rwlock> lock(dir_group_lock_); - DataDirGroup group_from_pb = DataDirGroup::FromPB(pb, idx_by_uuid_); + DataDirGroup group_from_pb; + RETURN_NOT_OK_PREPEND(group_from_pb.LoadFromPB(idx_by_uuid_, pb), Substitute( + "could not load data dir group for tablet $0", tablet_id)); DataDirGroup* other = InsertOrReturnExisting(&group_by_tablet_map_, tablet_id, group_from_pb); if (other != nullptr) { - return Status::AlreadyPresent("Tried to load directory group for tablet but one is already " - "registered", tablet_id); + return Status::AlreadyPresent(Substitute( + "tried to load directory group for tablet $0 but one is already registered", + tablet_id)); } for (int uuid_idx : group_from_pb.uuid_indices()) { InsertOrDie(&FindOrDie(tablets_by_uuid_idx_map_, uuid_idx), tablet_id); @@ -919,15 +982,16 @@ void DataDirManager::DeleteDataDirGroup(const std::string& tablet_id) { group_by_tablet_map_.erase(tablet_id); } -bool DataDirManager::GetDataDirGroupPB(const std::string& tablet_id, - DataDirGroupPB* pb) const { +Status DataDirManager::GetDataDirGroupPB(const string& tablet_id, + DataDirGroupPB* pb) const { shared_lock<rw_spinlock> lock(dir_group_lock_.get_lock()); const DataDirGroup* group = FindOrNull(group_by_tablet_map_, tablet_id); - if (group != nullptr) { - group->CopyToPB(uuid_by_idx_, pb); - return true; + if (group == nullptr) { + return Status::NotFound(Substitute( + "could not find data dir group for tablet $0", tablet_id)); } - return false; + RETURN_NOT_OK(group->CopyToPB(uuid_by_idx_, pb)); + return Status::OK(); } void DataDirManager::GetDirsForGroupUnlocked(int target_size, http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/fs/data_dirs.h ---------------------------------------------------------------------- diff --git a/src/kudu/fs/data_dirs.h b/src/kudu/fs/data_dirs.h index d886f91..8dcf371 100644 --- a/src/kudu/fs/data_dirs.h +++ b/src/kudu/fs/data_dirs.h @@ -26,14 +26,11 @@ #include <utility> #include <vector> -#include <glog/logging.h> #include <gtest/gtest_prod.h> -#include "kudu/fs/fs.pb.h" #include "kudu/gutil/callback.h" -#include "kudu/gutil/ref_counted.h" #include "kudu/gutil/macros.h" -#include "kudu/gutil/map-util.h" +#include "kudu/gutil/ref_counted.h" #include "kudu/util/locks.h" #include "kudu/util/metrics.h" #include "kudu/util/monotime.h" @@ -41,6 +38,8 @@ #include "kudu/util/status.h" namespace kudu { + +class DataDirGroupPB; class Env; class ThreadPool; @@ -77,33 +76,29 @@ namespace internal { // The same directory may appear in multiple DataDirGroups. class DataDirGroup { public: - explicit DataDirGroup(std::vector<int> uuid_indices) - : uuid_indices_(std::move(uuid_indices)) {} - - static DataDirGroup FromPB(const DataDirGroupPB& pb, - const UuidIndexByUuidMap& uuid_idx_by_uuid) { - std::vector<int> uuid_indices; - for (const std::string& uuid : pb.uuids()) { - uuid_indices.push_back(FindOrDie(uuid_idx_by_uuid, uuid)); - } - return DataDirGroup(std::move(uuid_indices)); - } + DataDirGroup(); - void CopyToPB(const UuidByUuidIndexMap& uuid_by_uuid_idx, - DataDirGroupPB* pb) const { - DCHECK(pb); - DataDirGroupPB group; - for (int uuid_idx : uuid_indices_) { - group.add_uuids(FindOrDie(uuid_by_uuid_idx, uuid_idx)); - } - pb->Swap(&group); - } + explicit DataDirGroup(std::vector<int> uuid_indices); + + // Reloads the DataDirGroup with UUID indices for the UUIDs in 'pb' by + // looking them up in 'uuid_idx_by_uuid'. + // + // Returns an error if a uuid cannot be found. + Status LoadFromPB(const UuidIndexByUuidMap& uuid_idx_by_uuid, + const DataDirGroupPB& pb); + + // Writes this group's UUIDs to 'pb', looking them up via index in + // 'uuid_by_uuid_idx'. + // + // Returns an error if an index cannot be found. + Status CopyToPB(const UuidByUuidIndexMap& uuid_by_uuid_idx, + DataDirGroupPB* pb) const; const std::vector<int>& uuid_indices() const { return uuid_indices_; } private: // UUID indices corresponding to the data directories within the group. - const std::vector<int> uuid_indices_; + std::vector<int> uuid_indices_; }; } // namespace internal @@ -290,10 +285,17 @@ class DataDirManager { // Deserializes a DataDirGroupPB and associates the resulting DataDirGroup // with a tablet_id. // - // Results in an error if the tablet already exists. + // Returns an error if the tablet already exists or if a data dir in the + // group is missing. Status LoadDataDirGroupFromPB(const std::string& tablet_id, const DataDirGroupPB& pb); + // Serializes the DataDirGroupPB associated with the given tablet_id. + // + // Returns an error if the tablet was not already registered or if a data dir + // is missing. + Status GetDataDirGroupPB(const std::string& tablet_id, DataDirGroupPB* pb) const; + // Creates a new data dir group for the specified tablet. Adds data // directories to this new group until the limit specified by // fs_target_data_dirs_per_tablet, or until there is no more space. @@ -313,10 +315,6 @@ class DataDirManager { // and data dir to tablet set are cleared of all references to the tablet. void DeleteDataDirGroup(const std::string& tablet_id); - // Serializes the DataDirGroupPB associated with the given tablet_id. Returns - // false if none exist. - bool GetDataDirGroupPB(const std::string& tablet_id, DataDirGroupPB* pb) const; - // Returns a random directory from the specfied option's data dir group. If // there is no room in the group, returns an error. Status GetNextDataDir(const CreateBlockOptions& opts, DataDir** dir); http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/fs/log_block_manager-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/log_block_manager-test.cc b/src/kudu/fs/log_block_manager-test.cc index eca2ccc..460d9d4 100644 --- a/src/kudu/fs/log_block_manager-test.cc +++ b/src/kudu/fs/log_block_manager-test.cc @@ -116,7 +116,7 @@ class LogBlockManagerTest : public KuduTest { FsReport report; ASSERT_OK(bm_->Open(&report)); ASSERT_OK(dd_manager_->CreateDataDirGroup(test_tablet_name_)); - ASSERT_TRUE(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); + ASSERT_OK(dd_manager_->GetDataDirGroupPB(test_tablet_name_, &test_group_pb_)); } protected: http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/tablet/tablet_bootstrap.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_bootstrap.cc b/src/kudu/tablet/tablet_bootstrap.cc index 371a6f0..759b1b3 100644 --- a/src/kudu/tablet/tablet_bootstrap.cc +++ b/src/kudu/tablet/tablet_bootstrap.cc @@ -49,6 +49,8 @@ #include "kudu/consensus/opid.pb.h" #include "kudu/consensus/opid_util.h" #include "kudu/consensus/raft_consensus.h" +#include "kudu/fs/data_dirs.h" +#include "kudu/fs/fs.pb.h" #include "kudu/fs/fs_manager.h" #include "kudu/gutil/bind.h" #include "kudu/gutil/gscoped_ptr.h" @@ -88,7 +90,6 @@ #include "kudu/util/pb_util.h" #include "kudu/util/stopwatch.h" - DECLARE_int32(group_commit_queue_size_bytes); DEFINE_bool(skip_remove_old_recovery_dir, false, @@ -552,6 +553,16 @@ Status TabletBootstrap::RunBootstrap(shared_ptr<Tablet>* rebuilt_tablet, VLOG_WITH_PREFIX(1) << "Tablet Metadata: " << SecureDebugString(super_block); } + + // Ensure the tablet's data dirs are present and healthy before it is opened. + DataDirGroupPB data_dir_group; + RETURN_NOT_OK_PREPEND( + tablet_meta_->fs_manager()->dd_manager()->GetDataDirGroupPB(tablet_id, &data_dir_group), + "error retrieving tablet data dir group (one or more data dirs may have been removed)"); + if (tablet_meta_->fs_manager()->dd_manager()->IsTabletInFailedDir(tablet_id)) { + return Status::IOError("some tablet data is in a failed directory"); + } + RETURN_NOT_OK(flushed_stores_.InitFrom(*tablet_meta_.get())); bool has_blocks; http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/tablet/tablet_metadata.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc index 92eb8f8..641c925 100644 --- a/src/kudu/tablet/tablet_metadata.cc +++ b/src/kudu/tablet/tablet_metadata.cc @@ -217,7 +217,7 @@ Status TabletMetadata::DeleteTabletData(TabletDataState delete_type, // Keep a copy of the old data dir group in case of flush failure. DataDirGroupPB pb; - bool old_group_exists = fs_manager_->dd_manager()->GetDataDirGroupPB(tablet_id_, &pb); + bool old_group_exists = fs_manager_->dd_manager()->GetDataDirGroupPB(tablet_id_, &pb).ok(); // Remove the tablet's data dir group tracked by the DataDirManager. fs_manager_->dd_manager()->DeleteDataDirGroup(tablet_id_); @@ -420,8 +420,11 @@ Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock) fs_manager()->block_manager()->NotifyBlockId(max_block_id); if (superblock.has_data_dir_group()) { - RETURN_NOT_OK_PREPEND(fs_manager_->dd_manager()->LoadDataDirGroupFromPB( - tablet_id_, superblock.data_dir_group()), "Failed to load DataDirGroup from superblock"); + // An error loading the data dir group is non-fatal, it just means the + // tablet will fail to bootstrap later. + WARN_NOT_OK(fs_manager_->dd_manager()->LoadDataDirGroupFromPB( + tablet_id_, superblock.data_dir_group()), + "failed to load DataDirGroup from superblock"); } else if (tablet_data_state_ == TABLET_DATA_READY) { // If the superblock does not contain a DataDirGroup, this server has // likely been upgraded from before 1.5.0. Create a new DataDirGroup for @@ -675,7 +678,7 @@ Status TabletMetadata::ToSuperBlockUnlocked(TabletSuperBlockPB* super_block, // Serialize the tablet's DataDirGroupPB if one exists. One may not exist if // this is called during a tablet deletion. DataDirGroupPB group_pb; - if (fs_manager_->dd_manager()->GetDataDirGroupPB(tablet_id_, &group_pb)) { + if (fs_manager_->dd_manager()->GetDataDirGroupPB(tablet_id_, &group_pb).ok()) { pb.mutable_data_dir_group()->Swap(&group_pb); } http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/tserver/tablet_copy_client.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tserver/tablet_copy_client.cc b/src/kudu/tserver/tablet_copy_client.cc index 4e16e7d..ef88b0e 100644 --- a/src/kudu/tserver/tablet_copy_client.cc +++ b/src/kudu/tserver/tablet_copy_client.cc @@ -350,8 +350,8 @@ Status TabletCopyClient::Start(const HostPort& copy_source_addr, superblock_->tombstone_last_logged_opid(), &meta_)); } - CHECK(fs_manager_->dd_manager()->GetDataDirGroupPB(tablet_id_, - superblock_->mutable_data_dir_group())); + CHECK_OK(fs_manager_->dd_manager()->GetDataDirGroupPB( + tablet_id_, superblock_->mutable_data_dir_group())); // Create the ConsensusMetadata before returning from Start() so that it's // possible to vote while we are copying the replica for the first time. http://git-wip-us.apache.org/repos/asf/kudu/blob/b81d5569/src/kudu/tserver/ts_tablet_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc index a3c2887..0c8d8c4 100644 --- a/src/kudu/tserver/ts_tablet_manager.cc +++ b/src/kudu/tserver/ts_tablet_manager.cc @@ -941,14 +941,6 @@ void TSTabletManager::OpenTablet(const scoped_refptr<TabletReplica>& replica, return; } - // If the tablet is in a failed directory, don't bother bootstrapping. - if (fs_manager_->dd_manager()->IsTabletInFailedDir(tablet_id)) { - LOG(ERROR) << LogPrefix(tablet_id) << "aborting tablet bootstrap: tablet " - "has data in a failed directory"; - s = Status::IOError("Tablet data is in a failed directory"); - return; - } - consensus::ConsensusBootstrapInfo bootstrap_info; LOG_TIMING_PREFIX(INFO, LogPrefix(tablet_id), "bootstrapping tablet") { // Disable tracing for the bootstrap, since this would result in
