This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new a360806d7 Introduces flag to configure XFS quota headroom.
a360806d7 is described below

commit a360806d749ba7853a6bf3bdb445611ebd3af721
Author: Devin Leamy <[email protected]>
AuthorDate: Tue Jan 16 17:32:06 2024 -0500

    Introduces flag to configure XFS quota headroom.
    
    - `xfs_quota_headroom`: The size of hard XFS quota headroom.
---
 .../containerizer/mesos/isolators/xfs/disk.cpp     | 28 +++++--
 .../containerizer/mesos/isolators/xfs/disk.hpp     |  4 +-
 src/slave/flags.cpp                                |  5 ++
 src/slave/flags.hpp                                |  1 +
 src/tests/containerizer/xfs_quota_tests.cpp        | 88 ++++++++++++++++++++++
 5 files changed, 117 insertions(+), 9 deletions(-)

diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp 
b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
index 79b0c3a6e..abd7dc98e 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
@@ -220,7 +220,8 @@ Try<Isolator*> XfsDiskIsolatorProcess::create(const Flags& 
flags)
           quotaPolicy,
           flags.work_dir,
           totalProjectIds.get(),
-          flags.disk_watch_interval)));
+          flags.disk_watch_interval,
+          flags.xfs_quota_headroom)));
 }
 
 
@@ -229,14 +230,16 @@ XfsDiskIsolatorProcess::XfsDiskIsolatorProcess(
     xfs::QuotaPolicy _quotaPolicy,
     const std::string& _workDir,
     const IntervalSet<prid_t>& projectIds,
-    Duration _projectWatchInterval)
+    Duration _projectWatchInterval,
+    const Bytes _quotaHeadroom)
   : ProcessBase(process::ID::generate("xfs-disk-isolator")),
     watchInterval(_watchInterval),
     projectWatchInterval(_projectWatchInterval),
     quotaPolicy(_quotaPolicy),
     workDir(_workDir),
     totalProjectIds(projectIds),
-    freeProjectIds(projectIds)
+    freeProjectIds(projectIds),
+    quotaHeadroom(_quotaHeadroom)
 {
   // At the beginning, the free project range is the same as the
   // configured project range.
@@ -491,7 +494,7 @@ Future<Nothing> XfsDiskIsolatorProcess::recover(
     if (projectId.isNone()) {
       return Failure(
         "Failed to assign project to sandbox : Failed to obtain"
-        + " next project ID: range exhausted");
+        " next project ID: range exhausted");
     }
 
     infos.put(containerId, Owned<Info>(new Info(sandbox, projectId.get())));
@@ -595,6 +598,7 @@ static Try<xfs::QuotaInfo> applyProjectQuota(
     const string& path,
     prid_t projectId,
     Bytes limit,
+    Bytes headroom,
     xfs::QuotaPolicy quotaPolicy)
 {
   switch (quotaPolicy) {
@@ -618,7 +622,7 @@ static Try<xfs::QuotaInfo> applyProjectQuota(
       // has been reached without allowing the process to allocate too
       // much beyond the desired limit.
       if (quotaPolicy == xfs::QuotaPolicy::ENFORCING_ACTIVE) {
-        hardLimit += Megabytes(10);
+        hardLimit += headroom;
       }
 
       Try<Nothing> status = xfs::setProjectQuota(
@@ -658,7 +662,11 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
         pathInfo.quota = sandboxQuota.get();
 
         Try<xfs::QuotaInfo> status = applyProjectQuota(
-            directory, pathInfo.projectId, sandboxQuota.get(), quotaPolicy);
+            directory,
+            pathInfo.projectId,
+            sandboxQuota.get(),
+            quotaHeadroom,
+            quotaPolicy);
         if (status.isError()) {
           return Failure(status.error());
         }
@@ -720,8 +728,12 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
                 << directory << "'";
     }
 
-    Try<xfs::QuotaInfo> status =
-      applyProjectQuota(directory, projectId.get(), size, quotaPolicy);
+    Try<xfs::QuotaInfo> status = applyProjectQuota(
+        directory,
+        projectId.get(),
+        size,
+        quotaHeadroom,
+        quotaPolicy);
     if (status.isError()) {
       return Failure(status.error());
     }
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp 
b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
index 55af66b48..052f160e1 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
@@ -106,7 +106,8 @@ private:
       xfs::QuotaPolicy quotaPolicy,
       const std::string& workDir,
       const IntervalSet<prid_t>& projectIds,
-      Duration projectWatchInterval);
+      Duration projectWatchInterval,
+      const Bytes quotaHeadroom);
 
   // Responsible for validating a container hasn't broken the soft limit.
   void check();
@@ -131,6 +132,7 @@ private:
   const IntervalSet<prid_t> totalProjectIds;
   IntervalSet<prid_t> freeProjectIds;
   hashmap<ContainerID, process::Owned<Info>> infos;
+  const Bytes quotaHeadroom;
 
   // Track the device and filesystem path of unused project IDs we want
   // to reclaim.
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 940d5ee08..90cf32e71 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -1581,6 +1581,11 @@ mesos::internal::slave::Flags::Flags()
       "Whether the `disk/xfs` isolator should detect and terminate\n"
       "containers that exceed their allocated disk quota.",
       false);
+
+  add(&Flags::xfs_quota_headroom,
+      "xfs_quota_headroom",
+      "The size of hard XFS quota headroom.",
+      Megabytes(10));
 #endif
 
 #if ENABLE_SECCOMP_ISOLATOR
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 2133cc3d6..12c2404f9 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -209,6 +209,7 @@ public:
 #if ENABLE_XFS_DISK_ISOLATOR
   std::string xfs_project_range;
   bool xfs_kill_containers;
+  Bytes xfs_quota_headroom;
 #endif
 #if ENABLE_SECCOMP_ISOLATOR
   Option<std::string> seccomp_config_dir;
diff --git a/src/tests/containerizer/xfs_quota_tests.cpp 
b/src/tests/containerizer/xfs_quota_tests.cpp
index b3b42df8c..6625d3f9f 100644
--- a/src/tests/containerizer/xfs_quota_tests.cpp
+++ b/src/tests/containerizer/xfs_quota_tests.cpp
@@ -915,6 +915,94 @@ TEST_F(ROOT_XFS_QuotaTest, VolumeUsageExceedsQuotaWithKill)
 }
 
 
+// Verify that a task that tries to consume more disk space than it has 
requested
+// is able to do so, but eventually will be killed by the containerizer. For
+// this test we set up a custom quota headroom.
+TEST_F(ROOT_XFS_QuotaTest, DiskUsageExceedsQuotaWithSoftKillViolation)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.xfs_kill_containers = true;
+  flags.container_disk_watch_interval = Seconds(2);
+  flags.xfs_quota_headroom = Megabytes(20);
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers);
+  ASSERT_FALSE(offers->empty());
+
+  const Offer& offer = offers.get()[0];
+
+  const string containerReadyFile =
+    path::join(flags.work_dir, "container_ready");
+
+  // Create a task which requests 1MB disk, but actually uses more
+  // than 12MB disk.
+  TaskInfo task = createTask(
+      offer.slave_id(),
+      Resources::parse("cpus:1;mem:128;disk:1").get(),
+      "dd if=/dev/zero of=file bs=1048576 count=12 && touch " +
+      containerReadyFile + " && sleep 1000");
+
+  Future<TaskStatus> startingStatus;
+  Future<TaskStatus> runningStatus;
+  Future<TaskStatus> killedStatus;
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&startingStatus))
+    .WillOnce(FutureArg<1>(&runningStatus))
+    .WillOnce(FutureArg<1>(&killedStatus));
+
+  // Prevent the isolator from signaling the limitation.
+  Clock::pause();
+  Clock::settle();
+
+  driver.launchTasks(offer.id(), {task});
+
+  AWAIT_READY(startingStatus);
+  EXPECT_EQ(task.task_id(), startingStatus->task_id());
+  EXPECT_EQ(TASK_STARTING, startingStatus->state());
+
+  AWAIT_READY(runningStatus);
+  EXPECT_EQ(task.task_id(), runningStatus->task_id());
+  EXPECT_EQ(TASK_RUNNING, runningStatus->state());
+
+  // Await for the synchronization file to be created.
+  ASSERT_TRUE(waitForFileCreation(containerReadyFile));
+
+  Clock::advance(flags.container_disk_watch_interval);
+  Clock::resume();
+
+  AWAIT_READY(killedStatus);
+  EXPECT_EQ(task.task_id(), killedStatus->task_id());
+  EXPECT_EQ(TASK_FAILED, killedStatus->state());
+
+  EXPECT_EQ(TaskStatus::SOURCE_SLAVE, killedStatus->source());
+  EXPECT_EQ(
+      TaskStatus::REASON_CONTAINER_LIMITATION_DISK, killedStatus->reason());
+
+  driver.stop();
+  driver.join();
+}
+
+
 // This is the same logic as DiskUsageExceedsQuota except we turn off disk 
quota
 // enforcement, so exceeding the quota should be allowed.
 TEST_P(ROOT_XFS_QuotaEnforcement, DiskUsageExceedsQuotaNoEnforce)

Reply via email to