This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new 0f4b7b863 Added LevelDB compaction after replicated log truncation.
0f4b7b863 is described below

commit 0f4b7b86381d696273167516e01f3ca8b8130a95
Author: Ilya Pronin <[email protected]>
AuthorDate: Mon Jul 2 17:16:13 2018 -0700

    Added LevelDB compaction after replicated log truncation.
    
    LevelDB compaction algorithm doesn't seem to work well with out key
    usage pattern because the background compaction is not triggered [1].
    Because of that the replicated log storage grows over time until it
    consumes all disk space on the partition. As a workaround we can
    manually invoke leveldb::DB::CompactRange() every time a truncation
    action is persisted and some stored keys are removed. This workaround
    can be turned off by setting the MESOS_LOG_AUTO_COMPACT_DISABLED=1 
environment
    variable.
    
    [1] https://github.com/google/leveldb/issues/603
    
    See MESOS-184 and https://github.com/google/leveldb/issues/603 for details.
---
 src/log/leveldb.cpp | 47 +++++++++++++++++++++++++++++++++++++++--------
 src/log/leveldb.hpp |  4 ++++
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/src/log/leveldb.cpp b/src/log/leveldb.cpp
index 7cb84bc1a..867b7a05e 100644
--- a/src/log/leveldb.cpp
+++ b/src/log/leveldb.cpp
@@ -30,6 +30,8 @@
 #include <stout/strings.hpp>
 #include <stout/unreachable.hpp>
 
+#include <stout/os/getenv.hpp>
+
 #include "log/leveldb.hpp"
 
 using std::string;
@@ -156,7 +158,13 @@ static string encode(uint64_t position, bool adjust = true)
 LevelDBStorage::LevelDBStorage()
   : db(nullptr), first(None())
 {
-  // Nothing to see here.
+  // We provide an escape hatch in case some users experience issues
+  // with auto-compaction. This is currently only available as an environment
+  // variable since it's easier than exposing a flag up through the JNI layer.
+  Option<string> disableAutoCompact = 
os::getenv("MESOS_LOG_AUTO_COMPACT_DISABLED");
+  if (disableAutoCompact.getOrElse("0") == "1") {
+    autoCompact = false;
+  }
 }
 
 
@@ -444,19 +452,26 @@ Try<Nothing> LevelDBStorage::persist(const Action& action)
 
     // If we added any positions, attempt to delete them!
     if (index > 0) {
-      // We do this write asynchronously (e.g., using default options).
-      leveldb::Status status = db->Write(leveldb::WriteOptions(), &batch);
-
+      leveldb::Status status = db->Write(options, &batch);
       if (!status.ok()) {
         LOG(WARNING) << "Ignoring leveldb batch delete failure: "
                      << status.ToString();
       } else {
-        // Save the new first position!
-        CHECK_LT(first.get(), truncateTo.get());
-        first = truncateTo.get();
-
         VLOG(1) << "Deleting ~" << index
                 << " keys from leveldb took " << stopwatch.elapsed();
+
+        CHECK_LT(first.get(), truncateTo.get());
+
+        // LevelDB compaction algorithm doesn't seem to work well with
+        // out usage pattern because background compaction is not
+        // triggered. As a workaround we manually invoke compaction of
+        // the key range that we just removed. See MESOS-184 and
+        // https://github.com/google/leveldb/issues/603 for details.
+        if (autoCompact) {
+          compactRange(first.get(), truncateTo.get() - 1);
+        }
+
+        first = truncateTo.get();
       }
     }
   }
@@ -497,6 +512,22 @@ Try<Action> LevelDBStorage::read(uint64_t position)
   return record.action();
 }
 
+
+void LevelDBStorage::compactRange(uint64_t first, uint64_t last)
+{
+  Stopwatch stopwatch;
+  stopwatch.start();
+
+  const string firstData = encode(first);
+  const string lastData = encode(last);
+  const leveldb::Slice firstSlice(firstData);
+  const leveldb::Slice lastSlice(lastData);
+  db->CompactRange(&firstSlice, &lastSlice);
+
+  VLOG(1) << "Compacting range " << first << "-" << last << " took "
+          << stopwatch.elapsed();
+}
+
 } // namespace log {
 } // namespace internal {
 } // namespace mesos {
diff --git a/src/log/leveldb.hpp b/src/log/leveldb.hpp
index 1d5842a13..340d93ee8 100644
--- a/src/log/leveldb.hpp
+++ b/src/log/leveldb.hpp
@@ -42,10 +42,14 @@ public:
   Try<Action> read(uint64_t position) override;
 
 private:
+  void compactRange(uint64_t first, uint64_t last);
+
   leveldb::DB* db;
 
   // First position still in leveldb, used during truncation.
   Option<uint64_t> first;
+
+  bool autoCompact = true;
 };
 
 } // namespace log {

Reply via email to