This is an automated email from the ASF dual-hosted git repository.

grag pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit be90edd31a1833c5ed706b39f3a5547ae8153dd2
Author: Greg Mann <g...@mesosphere.io>
AuthorDate: Mon Apr 6 15:16:45 2020 -0700

    Sent appropriate task status reason when task over memory request.
    
    Review: https://reviews.apache.org/r/72305/
---
 src/common/protobuf_utils.cpp                      |  3 ++-
 .../mesos/isolators/cgroups/subsystems/memory.cpp  | 24 +++++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp
index 723d85a..8d1d5c4 100644
--- a/src/common/protobuf_utils.cpp
+++ b/src/common/protobuf_utils.cpp
@@ -254,7 +254,8 @@ StatusUpdate createStatusUpdate(
     CHECK(
         reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION ||
         reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK ||
-        reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY)
+        reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY ||
+        reason.get() == TaskStatus::REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED)
       << reason.get();
 
     status->mutable_limitation()->mutable_resources()->CopyFrom(
diff --git 
a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp 
b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
index 15f87ba..60c7a89 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
@@ -699,11 +699,33 @@ void MemorySubsystemProcess::oomWaited(
         ? (double) usage->bytes() / Bytes::MEGABYTES : 0),
       "*").get();
 
+  TaskStatus::Reason reason = TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY;
+
+  // If the container has a hard limit set higher than the soft limit, then
+  // check if the memory usage is above the soft limit but less than the hard
+  // limit. If so, we send a task status reason to the scheduler which 
indicates
+  // that this container was preferentially OOM-killed because it exceeded its
+  // memory request without hitting its memory limit.
+  Try<Bytes> softLimit =
+    cgroups::memory::soft_limit_in_bytes(hierarchy, cgroup);
+
+  if (softLimit.isError()) {
+    LOG(ERROR) << "Failed to read 'memory.soft_limit_in_bytes': "
+               << softLimit.error();
+  } else if (softLimit.get() < limit.get()) {
+    if (!usage.isError() &&
+        !limit.isError() &&
+        usage.get() > softLimit.get() &&
+        usage.get() < limit.get()) {
+      reason = TaskStatus::REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED;
+    }
+  }
+
   infos[containerId]->limitation.set(
       protobuf::slave::createContainerLimitation(
           mem,
           message.str(),
-          TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY));
+          reason));
 }
 
 

Reply via email to