This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new a360806d7 Introduces flag to configure XFS quota headroom.
a360806d7 is described below
commit a360806d749ba7853a6bf3bdb445611ebd3af721
Author: Devin Leamy <[email protected]>
AuthorDate: Tue Jan 16 17:32:06 2024 -0500
Introduces flag to configure XFS quota headroom.
- `xfs_quota_headroom`: The size of hard XFS quota headroom.
---
.../containerizer/mesos/isolators/xfs/disk.cpp | 28 +++++--
.../containerizer/mesos/isolators/xfs/disk.hpp | 4 +-
src/slave/flags.cpp | 5 ++
src/slave/flags.hpp | 1 +
src/tests/containerizer/xfs_quota_tests.cpp | 88 ++++++++++++++++++++++
5 files changed, 117 insertions(+), 9 deletions(-)
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
index 79b0c3a6e..abd7dc98e 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
@@ -220,7 +220,8 @@ Try<Isolator*> XfsDiskIsolatorProcess::create(const Flags&
flags)
quotaPolicy,
flags.work_dir,
totalProjectIds.get(),
- flags.disk_watch_interval)));
+ flags.disk_watch_interval,
+ flags.xfs_quota_headroom)));
}
@@ -229,14 +230,16 @@ XfsDiskIsolatorProcess::XfsDiskIsolatorProcess(
xfs::QuotaPolicy _quotaPolicy,
const std::string& _workDir,
const IntervalSet<prid_t>& projectIds,
- Duration _projectWatchInterval)
+ Duration _projectWatchInterval,
+ const Bytes _quotaHeadroom)
: ProcessBase(process::ID::generate("xfs-disk-isolator")),
watchInterval(_watchInterval),
projectWatchInterval(_projectWatchInterval),
quotaPolicy(_quotaPolicy),
workDir(_workDir),
totalProjectIds(projectIds),
- freeProjectIds(projectIds)
+ freeProjectIds(projectIds),
+ quotaHeadroom(_quotaHeadroom)
{
// At the beginning, the free project range is the same as the
// configured project range.
@@ -491,7 +494,7 @@ Future<Nothing> XfsDiskIsolatorProcess::recover(
if (projectId.isNone()) {
return Failure(
"Failed to assign project to sandbox : Failed to obtain"
- + " next project ID: range exhausted");
+ " next project ID: range exhausted");
}
infos.put(containerId, Owned<Info>(new Info(sandbox, projectId.get())));
@@ -595,6 +598,7 @@ static Try<xfs::QuotaInfo> applyProjectQuota(
const string& path,
prid_t projectId,
Bytes limit,
+ Bytes headroom,
xfs::QuotaPolicy quotaPolicy)
{
switch (quotaPolicy) {
@@ -618,7 +622,7 @@ static Try<xfs::QuotaInfo> applyProjectQuota(
// has been reached without allowing the process to allocate too
// much beyond the desired limit.
if (quotaPolicy == xfs::QuotaPolicy::ENFORCING_ACTIVE) {
- hardLimit += Megabytes(10);
+ hardLimit += headroom;
}
Try<Nothing> status = xfs::setProjectQuota(
@@ -658,7 +662,11 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
pathInfo.quota = sandboxQuota.get();
Try<xfs::QuotaInfo> status = applyProjectQuota(
- directory, pathInfo.projectId, sandboxQuota.get(), quotaPolicy);
+ directory,
+ pathInfo.projectId,
+ sandboxQuota.get(),
+ quotaHeadroom,
+ quotaPolicy);
if (status.isError()) {
return Failure(status.error());
}
@@ -720,8 +728,12 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
<< directory << "'";
}
- Try<xfs::QuotaInfo> status =
- applyProjectQuota(directory, projectId.get(), size, quotaPolicy);
+ Try<xfs::QuotaInfo> status = applyProjectQuota(
+ directory,
+ projectId.get(),
+ size,
+ quotaHeadroom,
+ quotaPolicy);
if (status.isError()) {
return Failure(status.error());
}
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
index 55af66b48..052f160e1 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
@@ -106,7 +106,8 @@ private:
xfs::QuotaPolicy quotaPolicy,
const std::string& workDir,
const IntervalSet<prid_t>& projectIds,
- Duration projectWatchInterval);
+ Duration projectWatchInterval,
+ const Bytes quotaHeadroom);
// Responsible for validating a container hasn't broken the soft limit.
void check();
@@ -131,6 +132,7 @@ private:
const IntervalSet<prid_t> totalProjectIds;
IntervalSet<prid_t> freeProjectIds;
hashmap<ContainerID, process::Owned<Info>> infos;
+ const Bytes quotaHeadroom;
// Track the device and filesystem path of unused project IDs we want
// to reclaim.
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 940d5ee08..90cf32e71 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -1581,6 +1581,11 @@ mesos::internal::slave::Flags::Flags()
"Whether the `disk/xfs` isolator should detect and terminate\n"
"containers that exceed their allocated disk quota.",
false);
+
+ add(&Flags::xfs_quota_headroom,
+ "xfs_quota_headroom",
+ "The size of hard XFS quota headroom.",
+ Megabytes(10));
#endif
#if ENABLE_SECCOMP_ISOLATOR
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 2133cc3d6..12c2404f9 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -209,6 +209,7 @@ public:
#if ENABLE_XFS_DISK_ISOLATOR
std::string xfs_project_range;
bool xfs_kill_containers;
+ Bytes xfs_quota_headroom;
#endif
#if ENABLE_SECCOMP_ISOLATOR
Option<std::string> seccomp_config_dir;
diff --git a/src/tests/containerizer/xfs_quota_tests.cpp
b/src/tests/containerizer/xfs_quota_tests.cpp
index b3b42df8c..6625d3f9f 100644
--- a/src/tests/containerizer/xfs_quota_tests.cpp
+++ b/src/tests/containerizer/xfs_quota_tests.cpp
@@ -915,6 +915,94 @@ TEST_F(ROOT_XFS_QuotaTest, VolumeUsageExceedsQuotaWithKill)
}
+// Verify that a task that tries to consume more disk space than it has
requested
+// is able to do so, but eventually will be killed by the containerizer. For
+// this test we set up a custom quota headroom.
+TEST_F(ROOT_XFS_QuotaTest, DiskUsageExceedsQuotaWithSoftKillViolation)
+{
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ slave::Flags flags = CreateSlaveFlags();
+ flags.xfs_kill_containers = true;
+ flags.container_disk_watch_interval = Seconds(2);
+ flags.xfs_quota_headroom = Megabytes(20);
+
+ Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(&driver, _, _));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ ASSERT_FALSE(offers->empty());
+
+ const Offer& offer = offers.get()[0];
+
+ const string containerReadyFile =
+ path::join(flags.work_dir, "container_ready");
+
+ // Create a task which requests 1MB disk, but actually uses more
+ // than 12MB disk.
+ TaskInfo task = createTask(
+ offer.slave_id(),
+ Resources::parse("cpus:1;mem:128;disk:1").get(),
+ "dd if=/dev/zero of=file bs=1048576 count=12 && touch " +
+ containerReadyFile + " && sleep 1000");
+
+ Future<TaskStatus> startingStatus;
+ Future<TaskStatus> runningStatus;
+ Future<TaskStatus> killedStatus;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&startingStatus))
+ .WillOnce(FutureArg<1>(&runningStatus))
+ .WillOnce(FutureArg<1>(&killedStatus));
+
+ // Prevent the isolator from signaling the limitation.
+ Clock::pause();
+ Clock::settle();
+
+ driver.launchTasks(offer.id(), {task});
+
+ AWAIT_READY(startingStatus);
+ EXPECT_EQ(task.task_id(), startingStatus->task_id());
+ EXPECT_EQ(TASK_STARTING, startingStatus->state());
+
+ AWAIT_READY(runningStatus);
+ EXPECT_EQ(task.task_id(), runningStatus->task_id());
+ EXPECT_EQ(TASK_RUNNING, runningStatus->state());
+
+ // Await for the synchronization file to be created.
+ ASSERT_TRUE(waitForFileCreation(containerReadyFile));
+
+ Clock::advance(flags.container_disk_watch_interval);
+ Clock::resume();
+
+ AWAIT_READY(killedStatus);
+ EXPECT_EQ(task.task_id(), killedStatus->task_id());
+ EXPECT_EQ(TASK_FAILED, killedStatus->state());
+
+ EXPECT_EQ(TaskStatus::SOURCE_SLAVE, killedStatus->source());
+ EXPECT_EQ(
+ TaskStatus::REASON_CONTAINER_LIMITATION_DISK, killedStatus->reason());
+
+ driver.stop();
+ driver.join();
+}
+
+
// This is the same logic as DiskUsageExceedsQuota except we turn off disk
quota
// enforcement, so exceeding the quota should be allowed.
TEST_P(ROOT_XFS_QuotaEnforcement, DiskUsageExceedsQuotaNoEnforce)