KUDU-2153. Servers should not delete tmp files until after locking directories

This changes the order of FsManager startup to not try to clean tmp
files until after successfully locking the data directories. This
prevents potential issues such as:

- a tserver is already running on some host, and in the middle of
  consensus voting. Thus it has created a tmp file.
- someone accidentally attempts to start a tserver with the same set of
  data dirs. Prior to this patch, it would delete the tmp file before
  realizing that it could not successfully lock its data dirs and
  aborting.
- the original tserver would crash or otherwise get very confused
  because the tmp file it just wrote would be gone.

This patch relies on the locking on the block manager instance files to
provide exclusive access to some non-block-manager-related storage such
as the consensus meta, etc. That means that it's still possible for
someone to hit the above issue if they were to start servers with
disjoint sets of data dirs but with the same meta dir. However, the
patch is still a net improvement because the most likely scenario is
that the two servers are started with identical configurations.

This patch also removes the block_manager_lock_dirs flag which was
apparently unused. It was always marked as 'unsafe' so it's not a
compatibility issue to remove it without a deprecation period.

Change-Id: I3a3471c8ce00e77fa1712ea518f6ab281864a08d
Reviewed-on: http://gerrit.cloudera.org:8080/9596
Reviewed-by: Adar Dembo <a...@cloudera.com>
Tested-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d88e5772
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d88e5772
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d88e5772

Branch: refs/heads/master
Commit: d88e5772ee323c8a305250c7d0aa0b49f67475dc
Parents: 846a785
Author: Todd Lipcon <t...@apache.org>
Authored: Mon Mar 12 18:25:22 2018 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Tue Mar 13 23:40:05 2018 +0000

----------------------------------------------------------------------
 src/kudu/fs/block_manager.cc      |  5 -----
 src/kudu/fs/file_block_manager.cc |  1 -
 src/kudu/fs/fs_manager-test.cc    | 38 +++++++++++++++++++++++++++-------
 src/kudu/fs/fs_manager.cc         | 19 +++++++++--------
 src/kudu/fs/log_block_manager.cc  |  1 -
 src/kudu/util/env_posix.cc        |  9 ++++++++
 6 files changed, 49 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d88e5772/src/kudu/fs/block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/block_manager.cc b/src/kudu/fs/block_manager.cc
index d3ce1c0..71a3075 100644
--- a/src/kudu/fs/block_manager.cc
+++ b/src/kudu/fs/block_manager.cc
@@ -49,11 +49,6 @@ DEFINE_string(block_manager_preflush_control, "finalize",
               "never be pre-flushed but still be flushed when closed.");
 TAG_FLAG(block_manager_preflush_control, experimental);
 
-DEFINE_bool(block_manager_lock_dirs, true,
-            "Lock the data block directories to prevent concurrent usage. "
-            "Note that read-only concurrent usage is still allowed.");
-TAG_FLAG(block_manager_lock_dirs, unsafe);
-
 DEFINE_int64(block_manager_max_open_files, -1,
              "Maximum number of open file descriptors to be used for data "
              "blocks. If -1, Kudu will use 40% of its resource limit as per "

http://git-wip-us.apache.org/repos/asf/kudu/blob/d88e5772/src/kudu/fs/file_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/file_block_manager.cc 
b/src/kudu/fs/file_block_manager.cc
index d5452f2..54ae7fa 100644
--- a/src/kudu/fs/file_block_manager.cc
+++ b/src/kudu/fs/file_block_manager.cc
@@ -65,7 +65,6 @@ using std::unique_ptr;
 using std::vector;
 using strings::Substitute;
 
-DECLARE_bool(block_manager_lock_dirs);
 DECLARE_bool(enable_data_block_fsync);
 DECLARE_string(block_manager_preflush_control);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d88e5772/src/kudu/fs/fs_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/fs_manager-test.cc b/src/kudu/fs/fs_manager-test.cc
index e21fe2c..5c0b3a5 100644
--- a/src/kudu/fs/fs_manager-test.cc
+++ b/src/kudu/fs/fs_manager-test.cc
@@ -18,7 +18,6 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
-#include <cstddef>
 #include <cstdint>
 #include <iostream>
 #include <iterator>
@@ -66,6 +65,7 @@ DECLARE_bool(crash_on_eio);
 DECLARE_double(env_inject_eio);
 DECLARE_string(block_manager);
 DECLARE_string(env_inject_eio_globs);
+DECLARE_string(env_inject_lock_failure_globs);
 DECLARE_string(umask);
 
 namespace kudu {
@@ -344,7 +344,8 @@ TEST_F(FsManagerTestBase, TestIsolatedMetadataDir) {
 }
 
 Status CountTmpFiles(Env* env, const string& path, const vector<string>& 
children,
-                     unordered_set<string>* checked_dirs, size_t* count) {
+                     unordered_set<string>* checked_dirs, int* count) {
+  int n = 0;
   vector<string> sub_objects;
   for (const string& name : children) {
     if (name == "." || name == "..") continue;
@@ -357,22 +358,29 @@ Status CountTmpFiles(Env* env, const string& path, const 
vector<string>& childre
       if (!ContainsKey(*checked_dirs, sub_path)) {
         checked_dirs->insert(sub_path);
         RETURN_NOT_OK(env->GetChildren(sub_path, &sub_objects));
-        RETURN_NOT_OK(CountTmpFiles(env, sub_path, sub_objects, checked_dirs, 
count));
+        int subdir_count = 0;
+        RETURN_NOT_OK(CountTmpFiles(env, sub_path, sub_objects, checked_dirs, 
&subdir_count));
+        n += subdir_count;
       }
     } else if (name.find(kTmpInfix) != string::npos) {
-      (*count)++;
+      n++;
     }
   }
+  *count = n;
   return Status::OK();
 }
 
-Status CountTmpFiles(Env* env, const vector<string>& roots, size_t* count) {
+Status CountTmpFiles(Env* env, const vector<string>& roots, int* count) {
   unordered_set<string> checked_dirs;
+  int n = 0;
   for (const string& root : roots) {
     vector<string> children;
     RETURN_NOT_OK(env->GetChildren(root, &children));
-    RETURN_NOT_OK(CountTmpFiles(env, root, children, &checked_dirs, count));
+    int dir_count;
+    RETURN_NOT_OK(CountTmpFiles(env, root, children, &checked_dirs, 
&dir_count));
+    n += dir_count;
   }
+  *count = n;
   return Status::OK();
 }
 
@@ -465,11 +473,25 @@ TEST_F(FsManagerTestBase, TestTmpFilesCleanup) {
   lookup_dirs.emplace_back(fs_manager()->GetConsensusMetadataDir());
   lookup_dirs.emplace_back(fs_manager()->GetTabletMetadataDir());
 
-  size_t n_tmp_files = 0;
+  int n_tmp_files = 0;
   ASSERT_OK(CountTmpFiles(fs_manager()->env(), lookup_dirs, &n_tmp_files));
   ASSERT_EQ(6, n_tmp_files);
 
-  // Opening fs_manager should remove tmp files
+  // The FsManager should not delete any tmp files if it fails to acquire
+  // a lock on the data dir.
+  string bm_instance = JoinPathSegments(fs_manager()->GetDataRootDirs()[1],
+                                        "block_manager_instance");
+  {
+    google::FlagSaver saver;
+    FLAGS_env_inject_lock_failure_globs = bm_instance;
+    ReinitFsManagerWithPaths(wal_path, data_paths);
+    Status s = fs_manager()->Open();
+    ASSERT_STR_MATCHES(s.ToString(), "Could not lock.*");
+    ASSERT_OK(CountTmpFiles(fs_manager()->env(), lookup_dirs, &n_tmp_files));
+    ASSERT_EQ(6, n_tmp_files);
+  }
+
+  // Now start up without the injected lock failure, and ensure that tmp files 
are deleted.
   ReinitFsManagerWithPaths(wal_path, data_paths);
   ASSERT_OK(fs_manager()->Open());
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d88e5772/src/kudu/fs/fs_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/fs_manager.cc b/src/kudu/fs/fs_manager.cc
index e7757f9..64f963f 100644
--- a/src/kudu/fs/fs_manager.cc
+++ b/src/kudu/fs/fs_manager.cc
@@ -387,15 +387,6 @@ Status FsManager::Open(FsReport* report) {
                           "unable to create missing filesystem roots");
   }
 
-  // Remove leftover temporary files from the WAL root and fix permissions.
-  //
-  // Temporary files in the data directory roots will be removed by the block
-  // manager.
-  if (!opts_.read_only) {
-    CleanTmpFiles();
-    CheckAndFixPermissions();
-  }
-
   // Open the directory manager if it has not been opened already.
   if (!dd_manager_) {
     DataDirManagerOptions dm_opts;
@@ -409,6 +400,14 @@ Status FsManager::Open(FsReport* report) {
     }
   }
 
+  // Only clean temporary files after the data dir manager successfully opened.
+  // This ensures that we were able to obtain the exclusive directory locks
+  // on the data directories before we start deleting files.
+  if (!opts_.read_only) {
+    CleanTmpFiles();
+    CheckAndFixPermissions();
+  }
+
   // Set an initial error handler to mark data directories as failed.
   error_manager_->SetErrorNotificationCb(ErrorHandlerType::DISK,
       Bind(&DataDirManager::MarkDataDirFailedByUuid, 
Unretained(dd_manager_.get())));
@@ -671,6 +670,8 @@ string FsManager::GetWalSegmentFileName(const string& 
tablet_id,
 
 void FsManager::CleanTmpFiles() {
   DCHECK(!opts_.read_only);
+  // Temporary files in the Block Manager directories are cleaned during
+  // Block Manager startup.
   for (const auto& s : { GetWalsRootDir(),
                          GetTabletMetadataDir(),
                          GetConsensusMetadataDir() }) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/d88e5772/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 2958358..4277c12 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -75,7 +75,6 @@
 #include "kudu/util/test_util_prod.h"
 #include "kudu/util/trace.h"
 
-DECLARE_bool(block_manager_lock_dirs);
 DECLARE_bool(enable_data_block_fsync);
 DECLARE_string(block_manager_preflush_control);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d88e5772/src/kudu/util/env_posix.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc
index fa4682b..2b35805 100644
--- a/src/kudu/util/env_posix.cc
+++ b/src/kudu/util/env_posix.cc
@@ -187,6 +187,12 @@ DEFINE_string(env_inject_eio_globs, "*",
               "I/O will fail. By default, all files may cause a failure.");
 TAG_FLAG(env_inject_eio_globs, hidden);
 
+DEFINE_string(env_inject_lock_failure_globs, "",
+              "Comma-separated list of glob patterns specifying files on which 
"
+              "attempts to obtain a file lock will fail. By default, no files "
+              "will fail.");
+TAG_FLAG(env_inject_lock_failure_globs, hidden);
+
 static __thread uint64_t thread_local_id;
 static Atomic64 cur_thread_local_id_;
 
@@ -1332,6 +1338,9 @@ class PosixEnv : public Env {
   virtual Status LockFile(const std::string& fname, FileLock** lock) OVERRIDE {
     TRACE_EVENT1("io", "PosixEnv::LockFile", "path", fname);
     MAYBE_RETURN_EIO(fname, IOError(Env::kInjectedFailureStatusMsg, EIO));
+    if (StringMatchesGlob(fname, FLAGS_env_inject_lock_failure_globs)) {
+      return IOError("lock " + fname, EAGAIN);
+    }
     ThreadRestrictions::AssertIOAllowed();
     *lock = nullptr;
     Status result;

Reply via email to