This is an automated email from the ASF dual-hosted git repository. grag pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit be90edd31a1833c5ed706b39f3a5547ae8153dd2 Author: Greg Mann <g...@mesosphere.io> AuthorDate: Mon Apr 6 15:16:45 2020 -0700 Sent appropriate task status reason when task over memory request. Review: https://reviews.apache.org/r/72305/ --- src/common/protobuf_utils.cpp | 3 ++- .../mesos/isolators/cgroups/subsystems/memory.cpp | 24 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp index 723d85a..8d1d5c4 100644 --- a/src/common/protobuf_utils.cpp +++ b/src/common/protobuf_utils.cpp @@ -254,7 +254,8 @@ StatusUpdate createStatusUpdate( CHECK( reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION || reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK || - reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) + reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY || + reason.get() == TaskStatus::REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED) << reason.get(); status->mutable_limitation()->mutable_resources()->CopyFrom( diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp index 15f87ba..60c7a89 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp @@ -699,11 +699,33 @@ void MemorySubsystemProcess::oomWaited( ? (double) usage->bytes() / Bytes::MEGABYTES : 0), "*").get(); + TaskStatus::Reason reason = TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY; + + // If the container has a hard limit set higher than the soft limit, then + // check if the memory usage is above the soft limit but less than the hard + // limit. If so, we send a task status reason to the scheduler which indicates + // that this container was preferentially OOM-killed because it exceeded its + // memory request without hitting its memory limit. + Try<Bytes> softLimit = + cgroups::memory::soft_limit_in_bytes(hierarchy, cgroup); + + if (softLimit.isError()) { + LOG(ERROR) << "Failed to read 'memory.soft_limit_in_bytes': " + << softLimit.error(); + } else if (softLimit.get() < limit.get()) { + if (!usage.isError() && + !limit.isError() && + usage.get() > softLimit.get() && + usage.get() < limit.get()) { + reason = TaskStatus::REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED; + } + } + infos[containerId]->limitation.set( protobuf::slave::createContainerLimitation( mem, message.str(), - TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY)); + reason)); }