This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new 0f4b7b863 Added LevelDB compaction after replicated log truncation.
0f4b7b863 is described below
commit 0f4b7b86381d696273167516e01f3ca8b8130a95
Author: Ilya Pronin <[email protected]>
AuthorDate: Mon Jul 2 17:16:13 2018 -0700
Added LevelDB compaction after replicated log truncation.
LevelDB compaction algorithm doesn't seem to work well with out key
usage pattern because the background compaction is not triggered [1].
Because of that the replicated log storage grows over time until it
consumes all disk space on the partition. As a workaround we can
manually invoke leveldb::DB::CompactRange() every time a truncation
action is persisted and some stored keys are removed. This workaround
can be turned off by setting the MESOS_LOG_AUTO_COMPACT_DISABLED=1
environment
variable.
[1] https://github.com/google/leveldb/issues/603
See MESOS-184 and https://github.com/google/leveldb/issues/603 for details.
---
src/log/leveldb.cpp | 47 +++++++++++++++++++++++++++++++++++++++--------
src/log/leveldb.hpp | 4 ++++
2 files changed, 43 insertions(+), 8 deletions(-)
diff --git a/src/log/leveldb.cpp b/src/log/leveldb.cpp
index 7cb84bc1a..867b7a05e 100644
--- a/src/log/leveldb.cpp
+++ b/src/log/leveldb.cpp
@@ -30,6 +30,8 @@
#include <stout/strings.hpp>
#include <stout/unreachable.hpp>
+#include <stout/os/getenv.hpp>
+
#include "log/leveldb.hpp"
using std::string;
@@ -156,7 +158,13 @@ static string encode(uint64_t position, bool adjust = true)
LevelDBStorage::LevelDBStorage()
: db(nullptr), first(None())
{
- // Nothing to see here.
+ // We provide an escape hatch in case some users experience issues
+ // with auto-compaction. This is currently only available as an environment
+ // variable since it's easier than exposing a flag up through the JNI layer.
+ Option<string> disableAutoCompact =
os::getenv("MESOS_LOG_AUTO_COMPACT_DISABLED");
+ if (disableAutoCompact.getOrElse("0") == "1") {
+ autoCompact = false;
+ }
}
@@ -444,19 +452,26 @@ Try<Nothing> LevelDBStorage::persist(const Action& action)
// If we added any positions, attempt to delete them!
if (index > 0) {
- // We do this write asynchronously (e.g., using default options).
- leveldb::Status status = db->Write(leveldb::WriteOptions(), &batch);
-
+ leveldb::Status status = db->Write(options, &batch);
if (!status.ok()) {
LOG(WARNING) << "Ignoring leveldb batch delete failure: "
<< status.ToString();
} else {
- // Save the new first position!
- CHECK_LT(first.get(), truncateTo.get());
- first = truncateTo.get();
-
VLOG(1) << "Deleting ~" << index
<< " keys from leveldb took " << stopwatch.elapsed();
+
+ CHECK_LT(first.get(), truncateTo.get());
+
+ // LevelDB compaction algorithm doesn't seem to work well with
+ // out usage pattern because background compaction is not
+ // triggered. As a workaround we manually invoke compaction of
+ // the key range that we just removed. See MESOS-184 and
+ // https://github.com/google/leveldb/issues/603 for details.
+ if (autoCompact) {
+ compactRange(first.get(), truncateTo.get() - 1);
+ }
+
+ first = truncateTo.get();
}
}
}
@@ -497,6 +512,22 @@ Try<Action> LevelDBStorage::read(uint64_t position)
return record.action();
}
+
+void LevelDBStorage::compactRange(uint64_t first, uint64_t last)
+{
+ Stopwatch stopwatch;
+ stopwatch.start();
+
+ const string firstData = encode(first);
+ const string lastData = encode(last);
+ const leveldb::Slice firstSlice(firstData);
+ const leveldb::Slice lastSlice(lastData);
+ db->CompactRange(&firstSlice, &lastSlice);
+
+ VLOG(1) << "Compacting range " << first << "-" << last << " took "
+ << stopwatch.elapsed();
+}
+
} // namespace log {
} // namespace internal {
} // namespace mesos {
diff --git a/src/log/leveldb.hpp b/src/log/leveldb.hpp
index 1d5842a13..340d93ee8 100644
--- a/src/log/leveldb.hpp
+++ b/src/log/leveldb.hpp
@@ -42,10 +42,14 @@ public:
Try<Action> read(uint64_t position) override;
private:
+ void compactRange(uint64_t first, uint64_t last);
+
leveldb::DB* db;
// First position still in leveldb, used during truncation.
Option<uint64_t> first;
+
+ bool autoCompact = true;
};
} // namespace log {