Repository: mesos Updated Branches: refs/heads/master a5cc9b435 -> cfa168f51
Allowed cgroups mem isolator to limit swap by setting memory.memsw.limit_in_bytes. Review: https://reviews.apache.org/r/24316 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cfa168f5 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cfa168f5 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cfa168f5 Branch: refs/heads/master Commit: cfa168f5136f05d01ebb5ffa3a42a118db14c43e Parents: a5cc9b4 Author: Anton Lindström <[email protected]> Authored: Sun Aug 10 16:26:46 2014 -0700 Committer: Jie Yu <[email protected]> Committed: Sun Aug 10 16:51:11 2014 -0700 ---------------------------------------------------------------------- src/linux/cgroups.cpp | 71 ++++++++- src/linux/cgroups.hpp | 17 +++ .../containerizer/isolators/cgroups/mem.cpp | 143 ++++++++++++++----- .../containerizer/isolators/cgroups/mem.hpp | 7 +- src/slave/flags.hpp | 8 ++ 5 files changed, 209 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/linux/cgroups.cpp ---------------------------------------------------------------------- diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp index 47be0ef..989e307 100644 --- a/src/linux/cgroups.cpp +++ b/src/linux/cgroups.cpp @@ -1963,7 +1963,76 @@ Try<Nothing> limit_in_bytes( const Bytes& limit) { return cgroups::write( - hierarchy, cgroup, "memory.limit_in_bytes", stringify(limit.bytes())); + hierarchy, + cgroup, + "memory.limit_in_bytes", + stringify(limit.bytes())); +} + + +Result<Bytes> memsw_limit_in_bytes( + const string& hierarchy, + const string& cgroup) +{ + Try<bool> exists = cgroups::exists( + hierarchy, cgroup, "memory.memsw.limit_in_bytes"); + + if (exists.isError()) { + return Error( + "Could not check for existence of 'memory.memsw.limit_in_bytes': " + + exists.error()); + } + + if (!exists.get()) { + return None(); + } + + Try<string> read = cgroups::read( + hierarchy, cgroup, "memory.memsw.limit_in_bytes"); + + if (read.isError()) { + return Error(read.error()); + } + + Try<Bytes> bytes = Bytes::parse(strings::trim(read.get()) + "B"); + + if (bytes.isError()) { + return Error(bytes.error()); + } + + return bytes.get(); +} + + +Try<bool> memsw_limit_in_bytes( + const string& hierarchy, + const string& cgroup, + const Bytes& limit) +{ + Try<bool> exists = cgroups::exists( + hierarchy, cgroup, "memory.memsw.limit_in_bytes"); + + if (exists.isError()) { + return Error( + "Could not check for existence of 'memory.memsw.limit_in_bytes': " + + exists.error()); + } + + if (!exists.get()) { + return false; + } + + Try<Nothing> write = cgroups::write( + hierarchy, + cgroup, + "memory.memsw.limit_in_bytes", + stringify(limit.bytes())); + + if (write.isError()) { + return Error(write.error()); + } + + return true; } http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/linux/cgroups.hpp ---------------------------------------------------------------------- diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp index 26dcb3d..abf31df 100644 --- a/src/linux/cgroups.hpp +++ b/src/linux/cgroups.hpp @@ -455,6 +455,23 @@ Try<Nothing> limit_in_bytes( const Bytes& limit); +// Returns the memory limit from memory.memsw.limit_in_bytes. Returns +// none if memory.memsw.limit_in_bytes is not supported (e.g., when +// swap is turned off). +Result<Bytes> memsw_limit_in_bytes( + const std::string& hierarchy, + const std::string& cgroup); + + +// Sets the memory limit using memory.memsw.limit_in_bytes. Returns +// false if memory.memsw.limit_in_bytes is not supported (e.g., when +// swap is turned off). +Try<bool> memsw_limit_in_bytes( + const std::string& hierarchy, + const std::string& cgroup, + const Bytes& limit); + + // Returns the soft memory limit from memory.soft_limit_in_bytes. Try<Bytes> soft_limit_in_bytes( const std::string& hierarchy, http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/slave/containerizer/isolators/cgroups/mem.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp b/src/slave/containerizer/isolators/cgroups/mem.cpp index e8160ef..3ba580d 100644 --- a/src/slave/containerizer/isolators/cgroups/mem.cpp +++ b/src/slave/containerizer/isolators/cgroups/mem.cpp @@ -61,8 +61,11 @@ static Future<Option<T> > none() { return None(); } CgroupsMemIsolatorProcess::CgroupsMemIsolatorProcess( const Flags& _flags, - const string& _hierarchy) - : flags(_flags), hierarchy(_hierarchy) {} + const string& _hierarchy, + const bool _limitSwap) + : flags(_flags), + hierarchy(_hierarchy), + limitSwap(_limitSwap) {} CgroupsMemIsolatorProcess::~CgroupsMemIsolatorProcess() {} @@ -88,8 +91,26 @@ Try<Isolator*> CgroupsMemIsolatorProcess::create(const Flags& flags) return Error(enable.error()); } + // Determine whether to limit swap or not. + bool limitSwap = false; + + if (flags.cgroups_limit_swap) { + Result<Bytes> check = cgroups::memory::memsw_limit_in_bytes( + hierarchy.get(), flags.cgroups_root); + + if (check.isError()) { + return Error( + "Failed to read 'memory.memsw.limit_in_bytes': " + + check.error()); + } else if (check.isNone()) { + return Error("'memory.memsw.limit_in_bytes' is not available"); + } + + limitSwap = true; + } + process::Owned<IsolatorProcess> process( - new CgroupsMemIsolatorProcess(flags, hierarchy.get())); + new CgroupsMemIsolatorProcess(flags, hierarchy.get(), limitSwap)); return new Isolator(process); } @@ -124,10 +145,10 @@ Future<Nothing> CgroupsMemIsolatorProcess::recover( if (!exists.get()) { VLOG(1) << "Couldn't find cgroup for container " << containerId; - // This may occur if the executor has exited and the isolator has - // destroyed the cgroup but the slave dies before noticing this. This - // will be detected when the containerizer tries to monitor the - // executor's pid. + // This may occur if the executor has exited and the isolator + // has destroyed the cgroup but the slave dies before noticing + // this. This will be detected when the containerizer tries to + // monitor the executor's pid. continue; } @@ -176,8 +197,8 @@ Future<Option<CommandInfo> > CgroupsMemIsolatorProcess::prepare( // TODO(bmahler): Don't insert into 'infos' unless we create the // cgroup successfully. It's safe for now because 'cleanup' gets - // called if we return a Failure, but cleanup will fail because - // the cgroup does not exist when cgroups::destroy is called. + // called if we return a Failure, but cleanup will fail because the + // cgroup does not exist when cgroups::destroy is called. Info* info = new Info( containerId, path::join(flags.cgroups_root, containerId.value())); @@ -265,42 +286,78 @@ Future<Nothing> CgroupsMemIsolatorProcess::update( cgroups::memory::soft_limit_in_bytes(hierarchy, info->cgroup, limit); if (write.isError()) { - return Failure("Failed to set 'memory.soft_limit_in_bytes': " - + write.error()); + return Failure( + "Failed to set 'memory.soft_limit_in_bytes': " + write.error()); } LOG(INFO) << "Updated 'memory.soft_limit_in_bytes' to " << limit << " for container " << containerId; // Read the existing limit. - Try<Bytes> currentLimit = - cgroups::memory::limit_in_bytes(hierarchy, info->cgroup); + Bytes currentLimit; + + if (limitSwap) { + Result<Bytes> _currentLimit = + cgroups::memory::memsw_limit_in_bytes(hierarchy, info->cgroup); + + if (_currentLimit.isError()) { + return Failure( + "Failed to read 'memory.memsw.limit_in_bytes': " + + _currentLimit.error()); + } else if (_currentLimit.isNone()) { + return Failure("'memory.memsw.limit_in_bytes' is not available"); + } - if (currentLimit.isError()) { - return Failure( - "Failed to read 'memory.limit_in_bytes': " + currentLimit.error()); + currentLimit = _currentLimit.get(); + } else { + Try<Bytes> _currentLimit = + cgroups::memory::limit_in_bytes(hierarchy, info->cgroup); + + if (_currentLimit.isError()) { + return Failure( + "Failed to read 'memory.limit_in_bytes': " + + _currentLimit.error()); + } + + currentLimit = _currentLimit.get(); } // Determine whether to set the hard limit. If this is the first // time (info->pid.isNone()), or we're raising the existing limit, // then we can update the hard limit safely. Otherwise, if we need // to decrease 'memory.limit_in_bytes' we may induce an OOM if too - // much memory is in use. As a result, we only update the soft - // limit when the memory reservation is being reduced. This is - // probably okay if the machine has available resources. + // much memory is in use. As a result, we only update the soft limit + // when the memory reservation is being reduced. This is probably + // okay if the machine has available resources. // TODO(benh): Introduce a MemoryWatcherProcess which monitors the - // discrepancy between usage and soft limit and introduces a "manual oom" if - // necessary. - if (info->pid.isNone() || limit > currentLimit.get()) { - write = cgroups::memory::limit_in_bytes(hierarchy, info->cgroup, limit); - - if (write.isError()) { - return Failure("Failed to set 'memory.limit_in_bytes': " + - write.error()); - } + // discrepancy between usage and soft limit and introduces a "manual + // oom" if necessary. + if (info->pid.isNone() || limit > currentLimit) { + if (limitSwap) { + Try<bool> write = cgroups::memory::memsw_limit_in_bytes( + hierarchy, info->cgroup, limit); + + if (write.isError()) { + return Failure( + "Failed to set 'memory.memsw.limit_in_bytes': " + write.error()); + } else if (!write.get()) { + return Failure("'memory.memsw.limit_in_bytes' is not available"); + } + + LOG(INFO) << "Updated 'memory.memsw.limit_in_bytes' to " << limit + << " for container " << containerId; + } else { + Try<Nothing> write = cgroups::memory::limit_in_bytes( + hierarchy, info->cgroup, limit); - LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit - << " for container " << containerId; + if (write.isError()) { + return Failure( + "Failed to set 'memory.limit_in_bytes': " + write.error()); + } + + LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit + << " for container " << containerId; + } } return Nothing(); @@ -470,12 +527,28 @@ void CgroupsMemIsolatorProcess::oom(const ContainerID& containerId) message << "Memory limit exceeded: "; // Output the requested memory limit. - Try<Bytes> limit = cgroups::memory::limit_in_bytes(hierarchy, info->cgroup); - - if (limit.isError()) { - LOG(ERROR) << "Failed to read 'memory.limit_in_bytes': " << limit.error(); + if (limitSwap) { + Result<Bytes> limit = + cgroups::memory::memsw_limit_in_bytes(hierarchy, info->cgroup); + + if (limit.isError()) { + LOG(ERROR) << "Failed to read 'memory.memsw.limit_in_bytes': " + << limit.error(); + } else if (limit.isNone()) { + LOG(ERROR) << "'memory.memsw.limit_in_bytes' is not available"; + } else { + message << "Requested: " << limit.get() << " "; + } } else { - message << "Requested: " << limit.get() << " "; + Try<Bytes> limit = + cgroups::memory::limit_in_bytes(hierarchy, info->cgroup); + + if (limit.isError()) { + LOG(ERROR) << "Failed to read 'memory.limit_in_bytes': " + << limit.error(); + } else { + message << "Requested: " << limit.get() << " "; + } } // Output the maximum memory usage. http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/slave/containerizer/isolators/cgroups/mem.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/isolators/cgroups/mem.hpp b/src/slave/containerizer/isolators/cgroups/mem.hpp index 6869ed4..c734dae 100644 --- a/src/slave/containerizer/isolators/cgroups/mem.hpp +++ b/src/slave/containerizer/isolators/cgroups/mem.hpp @@ -74,7 +74,10 @@ public: const ContainerID& containerId); private: - CgroupsMemIsolatorProcess(const Flags& flags, const std::string& hierarchy); + CgroupsMemIsolatorProcess( + const Flags& flags, + const std::string& hierarchy, + bool limitSwap); virtual process::Future<Nothing> _cleanup( const ContainerID& containerId, @@ -113,6 +116,8 @@ private: // The path to the cgroups subsystem hierarchy root. const std::string hierarchy; + const bool limitSwap; + // TODO(bmahler): Use Owned<Info>. hashmap<ContainerID, Info*> infos; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/slave/flags.hpp ---------------------------------------------------------------------- diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp index 841de23..1e36c51 100644 --- a/src/slave/flags.hpp +++ b/src/slave/flags.hpp @@ -209,6 +209,13 @@ public: "via the CFS bandwidth limiting subfeature.\n", false); + // TODO(antonl): Set default to true in future releases. + add(&Flags::cgroups_limit_swap, + "cgroups_limit_swap", + "Cgroups feature flag to enable memory limits on both memory and\n" + "swap instead of just memory.\n", + false); + add(&Flags::slave_subsystems, "slave_subsystems", "List of comma-separated cgroup subsystems to run the slave binary\n" @@ -329,6 +336,7 @@ public: std::string cgroups_root; Option<std::string> cgroups_subsystems; bool cgroups_enable_cfs; + bool cgroups_limit_swap; Option<std::string> slave_subsystems; Option<std::string> perf_events; Duration perf_interval;
