Repository: kudu Updated Branches: refs/heads/branch-1.7.x 9f4b70f89 -> 132da394d
KUDU-2153. Servers should not delete tmp files until after locking directories This changes the order of FsManager startup to not try to clean tmp files until after successfully locking the data directories. This prevents potential issues such as: - a tserver is already running on some host, and in the middle of consensus voting. Thus it has created a tmp file. - someone accidentally attempts to start a tserver with the same set of data dirs. Prior to this patch, it would delete the tmp file before realizing that it could not successfully lock its data dirs and aborting. - the original tserver would crash or otherwise get very confused because the tmp file it just wrote would be gone. This patch relies on the locking on the block manager instance files to provide exclusive access to some non-block-manager-related storage such as the consensus meta, etc. That means that it's still possible for someone to hit the above issue if they were to start servers with disjoint sets of data dirs but with the same meta dir. However, the patch is still a net improvement because the most likely scenario is that the two servers are started with identical configurations. This patch also removes the block_manager_lock_dirs flag which was apparently unused. It was always marked as 'unsafe' so it's not a compatibility issue to remove it without a deprecation period. Change-Id: I3a3471c8ce00e77fa1712ea518f6ab281864a08d Reviewed-on: http://gerrit.cloudera.org:8080/9596 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Todd Lipcon <[email protected]> (cherry picked from commit d88e5772ee323c8a305250c7d0aa0b49f67475dc) Reviewed-on: http://gerrit.cloudera.org:8080/9622 Reviewed-by: Grant Henke <[email protected]> Tested-by: Grant Henke <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e6415bc5 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e6415bc5 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e6415bc5 Branch: refs/heads/branch-1.7.x Commit: e6415bc5863c895639d0c472b003f46334c8053d Parents: 9f4b70f Author: Todd Lipcon <[email protected]> Authored: Mon Mar 12 18:25:22 2018 -0700 Committer: Grant Henke <[email protected]> Committed: Wed Mar 14 23:13:54 2018 +0000 ---------------------------------------------------------------------- src/kudu/fs/block_manager.cc | 5 ----- src/kudu/fs/file_block_manager.cc | 1 - src/kudu/fs/fs_manager-test.cc | 38 +++++++++++++++++++++++++++------- src/kudu/fs/fs_manager.cc | 19 +++++++++-------- src/kudu/fs/log_block_manager.cc | 1 - src/kudu/util/env_posix.cc | 9 ++++++++ 6 files changed, 49 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/e6415bc5/src/kudu/fs/block_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/block_manager.cc b/src/kudu/fs/block_manager.cc index d3ce1c0..71a3075 100644 --- a/src/kudu/fs/block_manager.cc +++ b/src/kudu/fs/block_manager.cc @@ -49,11 +49,6 @@ DEFINE_string(block_manager_preflush_control, "finalize", "never be pre-flushed but still be flushed when closed."); TAG_FLAG(block_manager_preflush_control, experimental); -DEFINE_bool(block_manager_lock_dirs, true, - "Lock the data block directories to prevent concurrent usage. " - "Note that read-only concurrent usage is still allowed."); -TAG_FLAG(block_manager_lock_dirs, unsafe); - DEFINE_int64(block_manager_max_open_files, -1, "Maximum number of open file descriptors to be used for data " "blocks. If -1, Kudu will use 40% of its resource limit as per " http://git-wip-us.apache.org/repos/asf/kudu/blob/e6415bc5/src/kudu/fs/file_block_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/file_block_manager.cc b/src/kudu/fs/file_block_manager.cc index d5452f2..54ae7fa 100644 --- a/src/kudu/fs/file_block_manager.cc +++ b/src/kudu/fs/file_block_manager.cc @@ -65,7 +65,6 @@ using std::unique_ptr; using std::vector; using strings::Substitute; -DECLARE_bool(block_manager_lock_dirs); DECLARE_bool(enable_data_block_fsync); DECLARE_string(block_manager_preflush_control); http://git-wip-us.apache.org/repos/asf/kudu/blob/e6415bc5/src/kudu/fs/fs_manager-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/fs_manager-test.cc b/src/kudu/fs/fs_manager-test.cc index e21fe2c..5c0b3a5 100644 --- a/src/kudu/fs/fs_manager-test.cc +++ b/src/kudu/fs/fs_manager-test.cc @@ -18,7 +18,6 @@ #include <sys/stat.h> #include <unistd.h> -#include <cstddef> #include <cstdint> #include <iostream> #include <iterator> @@ -66,6 +65,7 @@ DECLARE_bool(crash_on_eio); DECLARE_double(env_inject_eio); DECLARE_string(block_manager); DECLARE_string(env_inject_eio_globs); +DECLARE_string(env_inject_lock_failure_globs); DECLARE_string(umask); namespace kudu { @@ -344,7 +344,8 @@ TEST_F(FsManagerTestBase, TestIsolatedMetadataDir) { } Status CountTmpFiles(Env* env, const string& path, const vector<string>& children, - unordered_set<string>* checked_dirs, size_t* count) { + unordered_set<string>* checked_dirs, int* count) { + int n = 0; vector<string> sub_objects; for (const string& name : children) { if (name == "." || name == "..") continue; @@ -357,22 +358,29 @@ Status CountTmpFiles(Env* env, const string& path, const vector<string>& childre if (!ContainsKey(*checked_dirs, sub_path)) { checked_dirs->insert(sub_path); RETURN_NOT_OK(env->GetChildren(sub_path, &sub_objects)); - RETURN_NOT_OK(CountTmpFiles(env, sub_path, sub_objects, checked_dirs, count)); + int subdir_count = 0; + RETURN_NOT_OK(CountTmpFiles(env, sub_path, sub_objects, checked_dirs, &subdir_count)); + n += subdir_count; } } else if (name.find(kTmpInfix) != string::npos) { - (*count)++; + n++; } } + *count = n; return Status::OK(); } -Status CountTmpFiles(Env* env, const vector<string>& roots, size_t* count) { +Status CountTmpFiles(Env* env, const vector<string>& roots, int* count) { unordered_set<string> checked_dirs; + int n = 0; for (const string& root : roots) { vector<string> children; RETURN_NOT_OK(env->GetChildren(root, &children)); - RETURN_NOT_OK(CountTmpFiles(env, root, children, &checked_dirs, count)); + int dir_count; + RETURN_NOT_OK(CountTmpFiles(env, root, children, &checked_dirs, &dir_count)); + n += dir_count; } + *count = n; return Status::OK(); } @@ -465,11 +473,25 @@ TEST_F(FsManagerTestBase, TestTmpFilesCleanup) { lookup_dirs.emplace_back(fs_manager()->GetConsensusMetadataDir()); lookup_dirs.emplace_back(fs_manager()->GetTabletMetadataDir()); - size_t n_tmp_files = 0; + int n_tmp_files = 0; ASSERT_OK(CountTmpFiles(fs_manager()->env(), lookup_dirs, &n_tmp_files)); ASSERT_EQ(6, n_tmp_files); - // Opening fs_manager should remove tmp files + // The FsManager should not delete any tmp files if it fails to acquire + // a lock on the data dir. + string bm_instance = JoinPathSegments(fs_manager()->GetDataRootDirs()[1], + "block_manager_instance"); + { + google::FlagSaver saver; + FLAGS_env_inject_lock_failure_globs = bm_instance; + ReinitFsManagerWithPaths(wal_path, data_paths); + Status s = fs_manager()->Open(); + ASSERT_STR_MATCHES(s.ToString(), "Could not lock.*"); + ASSERT_OK(CountTmpFiles(fs_manager()->env(), lookup_dirs, &n_tmp_files)); + ASSERT_EQ(6, n_tmp_files); + } + + // Now start up without the injected lock failure, and ensure that tmp files are deleted. ReinitFsManagerWithPaths(wal_path, data_paths); ASSERT_OK(fs_manager()->Open()); http://git-wip-us.apache.org/repos/asf/kudu/blob/e6415bc5/src/kudu/fs/fs_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/fs_manager.cc b/src/kudu/fs/fs_manager.cc index e7757f9..64f963f 100644 --- a/src/kudu/fs/fs_manager.cc +++ b/src/kudu/fs/fs_manager.cc @@ -387,15 +387,6 @@ Status FsManager::Open(FsReport* report) { "unable to create missing filesystem roots"); } - // Remove leftover temporary files from the WAL root and fix permissions. - // - // Temporary files in the data directory roots will be removed by the block - // manager. - if (!opts_.read_only) { - CleanTmpFiles(); - CheckAndFixPermissions(); - } - // Open the directory manager if it has not been opened already. if (!dd_manager_) { DataDirManagerOptions dm_opts; @@ -409,6 +400,14 @@ Status FsManager::Open(FsReport* report) { } } + // Only clean temporary files after the data dir manager successfully opened. + // This ensures that we were able to obtain the exclusive directory locks + // on the data directories before we start deleting files. + if (!opts_.read_only) { + CleanTmpFiles(); + CheckAndFixPermissions(); + } + // Set an initial error handler to mark data directories as failed. error_manager_->SetErrorNotificationCb(ErrorHandlerType::DISK, Bind(&DataDirManager::MarkDataDirFailedByUuid, Unretained(dd_manager_.get()))); @@ -671,6 +670,8 @@ string FsManager::GetWalSegmentFileName(const string& tablet_id, void FsManager::CleanTmpFiles() { DCHECK(!opts_.read_only); + // Temporary files in the Block Manager directories are cleaned during + // Block Manager startup. for (const auto& s : { GetWalsRootDir(), GetTabletMetadataDir(), GetConsensusMetadataDir() }) { http://git-wip-us.apache.org/repos/asf/kudu/blob/e6415bc5/src/kudu/fs/log_block_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc index 2958358..4277c12 100644 --- a/src/kudu/fs/log_block_manager.cc +++ b/src/kudu/fs/log_block_manager.cc @@ -75,7 +75,6 @@ #include "kudu/util/test_util_prod.h" #include "kudu/util/trace.h" -DECLARE_bool(block_manager_lock_dirs); DECLARE_bool(enable_data_block_fsync); DECLARE_string(block_manager_preflush_control); http://git-wip-us.apache.org/repos/asf/kudu/blob/e6415bc5/src/kudu/util/env_posix.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc index fa4682b..2b35805 100644 --- a/src/kudu/util/env_posix.cc +++ b/src/kudu/util/env_posix.cc @@ -187,6 +187,12 @@ DEFINE_string(env_inject_eio_globs, "*", "I/O will fail. By default, all files may cause a failure."); TAG_FLAG(env_inject_eio_globs, hidden); +DEFINE_string(env_inject_lock_failure_globs, "", + "Comma-separated list of glob patterns specifying files on which " + "attempts to obtain a file lock will fail. By default, no files " + "will fail."); +TAG_FLAG(env_inject_lock_failure_globs, hidden); + static __thread uint64_t thread_local_id; static Atomic64 cur_thread_local_id_; @@ -1332,6 +1338,9 @@ class PosixEnv : public Env { virtual Status LockFile(const std::string& fname, FileLock** lock) OVERRIDE { TRACE_EVENT1("io", "PosixEnv::LockFile", "path", fname); MAYBE_RETURN_EIO(fname, IOError(Env::kInjectedFailureStatusMsg, EIO)); + if (StringMatchesGlob(fname, FLAGS_env_inject_lock_failure_globs)) { + return IOError("lock " + fname, EAGAIN); + } ThreadRestrictions::AssertIOAllowed(); *lock = nullptr; Status result;
