Repository: mesos
Updated Branches:
  refs/heads/master a5cc9b435 -> cfa168f51


Allowed cgroups mem isolator to limit swap by setting
memory.memsw.limit_in_bytes.

Review: https://reviews.apache.org/r/24316


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cfa168f5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cfa168f5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cfa168f5

Branch: refs/heads/master
Commit: cfa168f5136f05d01ebb5ffa3a42a118db14c43e
Parents: a5cc9b4
Author: Anton Lindström <[email protected]>
Authored: Sun Aug 10 16:26:46 2014 -0700
Committer: Jie Yu <[email protected]>
Committed: Sun Aug 10 16:51:11 2014 -0700

----------------------------------------------------------------------
 src/linux/cgroups.cpp                           |  71 ++++++++-
 src/linux/cgroups.hpp                           |  17 +++
 .../containerizer/isolators/cgroups/mem.cpp     | 143 ++++++++++++++-----
 .../containerizer/isolators/cgroups/mem.hpp     |   7 +-
 src/slave/flags.hpp                             |   8 ++
 5 files changed, 209 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 47be0ef..989e307 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -1963,7 +1963,76 @@ Try<Nothing> limit_in_bytes(
     const Bytes& limit)
 {
   return cgroups::write(
-      hierarchy, cgroup, "memory.limit_in_bytes", stringify(limit.bytes()));
+      hierarchy,
+      cgroup,
+      "memory.limit_in_bytes",
+      stringify(limit.bytes()));
+}
+
+
+Result<Bytes> memsw_limit_in_bytes(
+    const string& hierarchy,
+    const string& cgroup)
+{
+  Try<bool> exists = cgroups::exists(
+      hierarchy, cgroup, "memory.memsw.limit_in_bytes");
+
+  if (exists.isError()) {
+    return Error(
+        "Could not check for existence of 'memory.memsw.limit_in_bytes': " +
+        exists.error());
+  }
+
+  if (!exists.get()) {
+    return None();
+  }
+
+  Try<string> read = cgroups::read(
+      hierarchy, cgroup, "memory.memsw.limit_in_bytes");
+
+  if (read.isError()) {
+    return Error(read.error());
+  }
+
+  Try<Bytes> bytes = Bytes::parse(strings::trim(read.get()) + "B");
+
+  if (bytes.isError()) {
+    return Error(bytes.error());
+  }
+
+  return bytes.get();
+}
+
+
+Try<bool> memsw_limit_in_bytes(
+    const string& hierarchy,
+    const string& cgroup,
+    const Bytes& limit)
+{
+  Try<bool> exists = cgroups::exists(
+      hierarchy, cgroup, "memory.memsw.limit_in_bytes");
+
+  if (exists.isError()) {
+    return Error(
+        "Could not check for existence of 'memory.memsw.limit_in_bytes': " +
+        exists.error());
+  }
+
+  if (!exists.get()) {
+    return false;
+  }
+
+  Try<Nothing> write = cgroups::write(
+      hierarchy,
+      cgroup,
+      "memory.memsw.limit_in_bytes",
+      stringify(limit.bytes()));
+
+  if (write.isError()) {
+    return Error(write.error());
+  }
+
+  return true;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 26dcb3d..abf31df 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -455,6 +455,23 @@ Try<Nothing> limit_in_bytes(
     const Bytes& limit);
 
 
+// Returns the memory limit from memory.memsw.limit_in_bytes. Returns
+// none if memory.memsw.limit_in_bytes is not supported (e.g., when
+// swap is turned off).
+Result<Bytes> memsw_limit_in_bytes(
+    const std::string& hierarchy,
+    const std::string& cgroup);
+
+
+// Sets the memory limit using memory.memsw.limit_in_bytes. Returns
+// false if memory.memsw.limit_in_bytes is not supported (e.g., when
+// swap is turned off).
+Try<bool> memsw_limit_in_bytes(
+    const std::string& hierarchy,
+    const std::string& cgroup,
+    const Bytes& limit);
+
+
 // Returns the soft memory limit from memory.soft_limit_in_bytes.
 Try<Bytes> soft_limit_in_bytes(
     const std::string& hierarchy,

http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/slave/containerizer/isolators/cgroups/mem.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp 
b/src/slave/containerizer/isolators/cgroups/mem.cpp
index e8160ef..3ba580d 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.cpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.cpp
@@ -61,8 +61,11 @@ static Future<Option<T> > none() { return None(); }
 
 CgroupsMemIsolatorProcess::CgroupsMemIsolatorProcess(
     const Flags& _flags,
-    const string& _hierarchy)
-  : flags(_flags), hierarchy(_hierarchy) {}
+    const string& _hierarchy,
+    const bool _limitSwap)
+  : flags(_flags),
+    hierarchy(_hierarchy),
+    limitSwap(_limitSwap) {}
 
 
 CgroupsMemIsolatorProcess::~CgroupsMemIsolatorProcess() {}
@@ -88,8 +91,26 @@ Try<Isolator*> CgroupsMemIsolatorProcess::create(const 
Flags& flags)
     return Error(enable.error());
   }
 
+  // Determine whether to limit swap or not.
+  bool limitSwap = false;
+
+  if (flags.cgroups_limit_swap) {
+    Result<Bytes> check = cgroups::memory::memsw_limit_in_bytes(
+        hierarchy.get(), flags.cgroups_root);
+
+    if (check.isError()) {
+      return Error(
+          "Failed to read 'memory.memsw.limit_in_bytes': " +
+          check.error());
+    } else if (check.isNone()) {
+      return Error("'memory.memsw.limit_in_bytes' is not available");
+    }
+
+    limitSwap = true;
+  }
+
   process::Owned<IsolatorProcess> process(
-      new CgroupsMemIsolatorProcess(flags, hierarchy.get()));
+      new CgroupsMemIsolatorProcess(flags, hierarchy.get(), limitSwap));
 
   return new Isolator(process);
 }
@@ -124,10 +145,10 @@ Future<Nothing> CgroupsMemIsolatorProcess::recover(
 
     if (!exists.get()) {
       VLOG(1) << "Couldn't find cgroup for container " << containerId;
-      // This may occur if the executor has exited and the isolator has
-      // destroyed the cgroup but the slave dies before noticing this. This
-      // will be detected when the containerizer tries to monitor the
-      // executor's pid.
+      // This may occur if the executor has exited and the isolator
+      // has destroyed the cgroup but the slave dies before noticing
+      // this. This will be detected when the containerizer tries to
+      // monitor the executor's pid.
       continue;
     }
 
@@ -176,8 +197,8 @@ Future<Option<CommandInfo> > 
CgroupsMemIsolatorProcess::prepare(
 
   // TODO(bmahler): Don't insert into 'infos' unless we create the
   // cgroup successfully. It's safe for now because 'cleanup' gets
-  // called if we return a Failure, but cleanup will fail because
-  // the cgroup does not exist when cgroups::destroy is called.
+  // called if we return a Failure, but cleanup will fail because the
+  // cgroup does not exist when cgroups::destroy is called.
   Info* info = new Info(
       containerId, path::join(flags.cgroups_root, containerId.value()));
 
@@ -265,42 +286,78 @@ Future<Nothing> CgroupsMemIsolatorProcess::update(
     cgroups::memory::soft_limit_in_bytes(hierarchy, info->cgroup, limit);
 
   if (write.isError()) {
-    return Failure("Failed to set 'memory.soft_limit_in_bytes': "
-        + write.error());
+    return Failure(
+        "Failed to set 'memory.soft_limit_in_bytes': " + write.error());
   }
 
   LOG(INFO) << "Updated 'memory.soft_limit_in_bytes' to " << limit
             << " for container " << containerId;
 
   // Read the existing limit.
-  Try<Bytes> currentLimit =
-    cgroups::memory::limit_in_bytes(hierarchy, info->cgroup);
+  Bytes currentLimit;
+
+  if (limitSwap) {
+    Result<Bytes> _currentLimit =
+      cgroups::memory::memsw_limit_in_bytes(hierarchy, info->cgroup);
+
+    if (_currentLimit.isError()) {
+      return Failure(
+          "Failed to read 'memory.memsw.limit_in_bytes': " +
+          _currentLimit.error());
+    } else if (_currentLimit.isNone()) {
+      return Failure("'memory.memsw.limit_in_bytes' is not available");
+    }
 
-  if (currentLimit.isError()) {
-    return Failure(
-        "Failed to read 'memory.limit_in_bytes': " + currentLimit.error());
+    currentLimit = _currentLimit.get();
+  } else {
+    Try<Bytes> _currentLimit =
+      cgroups::memory::limit_in_bytes(hierarchy, info->cgroup);
+
+    if (_currentLimit.isError()) {
+      return Failure(
+          "Failed to read 'memory.limit_in_bytes': " +
+          _currentLimit.error());
+    }
+
+    currentLimit = _currentLimit.get();
   }
 
   // Determine whether to set the hard limit. If this is the first
   // time (info->pid.isNone()), or we're raising the existing limit,
   // then we can update the hard limit safely. Otherwise, if we need
   // to decrease 'memory.limit_in_bytes' we may induce an OOM if too
-  // much memory is in use. As a result, we only update the soft
-  // limit when the memory reservation is being reduced. This is
-  // probably okay if the machine has available resources.
+  // much memory is in use. As a result, we only update the soft limit
+  // when the memory reservation is being reduced. This is probably
+  // okay if the machine has available resources.
   // TODO(benh): Introduce a MemoryWatcherProcess which monitors the
-  // discrepancy between usage and soft limit and introduces a "manual oom" if
-  // necessary.
-  if (info->pid.isNone() || limit > currentLimit.get()) {
-    write = cgroups::memory::limit_in_bytes(hierarchy, info->cgroup, limit);
-
-    if (write.isError()) {
-      return Failure("Failed to set 'memory.limit_in_bytes': " +
-                     write.error());
-    }
+  // discrepancy between usage and soft limit and introduces a "manual
+  // oom" if necessary.
+  if (info->pid.isNone() || limit > currentLimit) {
+    if (limitSwap) {
+      Try<bool> write = cgroups::memory::memsw_limit_in_bytes(
+          hierarchy, info->cgroup, limit);
+
+      if (write.isError()) {
+        return Failure(
+            "Failed to set 'memory.memsw.limit_in_bytes': " + write.error());
+      } else if (!write.get()) {
+        return Failure("'memory.memsw.limit_in_bytes' is not available");
+      }
+
+      LOG(INFO) << "Updated 'memory.memsw.limit_in_bytes' to " << limit
+                << " for container " << containerId;
+    } else {
+      Try<Nothing> write = cgroups::memory::limit_in_bytes(
+          hierarchy, info->cgroup, limit);
 
-    LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit
-              << " for container " << containerId;
+      if (write.isError()) {
+        return Failure(
+            "Failed to set 'memory.limit_in_bytes': " + write.error());
+      }
+
+      LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit
+                << " for container " << containerId;
+    }
   }
 
   return Nothing();
@@ -470,12 +527,28 @@ void CgroupsMemIsolatorProcess::oom(const ContainerID& 
containerId)
   message << "Memory limit exceeded: ";
 
   // Output the requested memory limit.
-  Try<Bytes> limit = cgroups::memory::limit_in_bytes(hierarchy, info->cgroup);
-
-  if (limit.isError()) {
-    LOG(ERROR) << "Failed to read 'memory.limit_in_bytes': " << limit.error();
+  if (limitSwap) {
+    Result<Bytes> limit =
+      cgroups::memory::memsw_limit_in_bytes(hierarchy, info->cgroup);
+
+    if (limit.isError()) {
+      LOG(ERROR) << "Failed to read 'memory.memsw.limit_in_bytes': "
+                 << limit.error();
+    } else if (limit.isNone()) {
+      LOG(ERROR) << "'memory.memsw.limit_in_bytes' is not available";
+    } else {
+      message << "Requested: " << limit.get() << " ";
+    }
   } else {
-    message << "Requested: " << limit.get() << " ";
+    Try<Bytes> limit =
+      cgroups::memory::limit_in_bytes(hierarchy, info->cgroup);
+
+    if (limit.isError()) {
+      LOG(ERROR) << "Failed to read 'memory.limit_in_bytes': "
+                 << limit.error();
+    } else {
+      message << "Requested: " << limit.get() << " ";
+    }
   }
 
   // Output the maximum memory usage.

http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/slave/containerizer/isolators/cgroups/mem.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.hpp 
b/src/slave/containerizer/isolators/cgroups/mem.hpp
index 6869ed4..c734dae 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.hpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.hpp
@@ -74,7 +74,10 @@ public:
       const ContainerID& containerId);
 
 private:
-  CgroupsMemIsolatorProcess(const Flags& flags, const std::string& hierarchy);
+  CgroupsMemIsolatorProcess(
+      const Flags& flags,
+      const std::string& hierarchy,
+      bool limitSwap);
 
   virtual process::Future<Nothing> _cleanup(
       const ContainerID& containerId,
@@ -113,6 +116,8 @@ private:
   // The path to the cgroups subsystem hierarchy root.
   const std::string hierarchy;
 
+  const bool limitSwap;
+
   // TODO(bmahler): Use Owned<Info>.
   hashmap<ContainerID, Info*> infos;
 };

http://git-wip-us.apache.org/repos/asf/mesos/blob/cfa168f5/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 841de23..1e36c51 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -209,6 +209,13 @@ public:
         "via the CFS bandwidth limiting subfeature.\n",
         false);
 
+    // TODO(antonl): Set default to true in future releases.
+    add(&Flags::cgroups_limit_swap,
+        "cgroups_limit_swap",
+        "Cgroups feature flag to enable memory limits on both memory and\n"
+        "swap instead of just memory.\n",
+        false);
+
     add(&Flags::slave_subsystems,
         "slave_subsystems",
         "List of comma-separated cgroup subsystems to run the slave binary\n"
@@ -329,6 +336,7 @@ public:
   std::string cgroups_root;
   Option<std::string> cgroups_subsystems;
   bool cgroups_enable_cfs;
+  bool cgroups_limit_swap;
   Option<std::string> slave_subsystems;
   Option<std::string> perf_events;
   Duration perf_interval;

Reply via email to