This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new eb3b5a16d [cgroups2] Introduce the PerfEventControllerProcess.
eb3b5a16d is described below
commit eb3b5a16dc7d5d96775ac04bbfa0554a3c6e2051
Author: None <None>
AuthorDate: Tue May 21 00:35:40 2024 -0400
[cgroups2] Introduce the PerfEventControllerProcess.
Introduces the controller process for perf event which was also present
in cgroups1. The controller is automatically enabled, and should not be
visible inside the cgroups.controllers file in the root cgroup.
As a consequence, we will not be able to manually enable or disable this
controller via writing to the cgroup.subtree_control file.
References:
* perf_event section in https://docs.kernel.org/admin-guide/cgroup-v2.html
* slide 34 in
https://man7.org/conf/ndctechtown2021/cgroups-v2-part-1-intro-NDC-TechTown-2021-Kerrisk.pdf
Review: https://reviews.apache.org/r/74997/
---
src/CMakeLists.txt | 4 +-
src/Makefile.am | 5 +-
.../mesos/isolators/cgroups2/cgroups2.cpp | 4 +-
.../mesos/isolators/cgroups2/constants.hpp | 1 +
.../isolators/cgroups2/controllers/perf_event.cpp | 223 +++++++++++++++++++++
.../isolators/cgroups2/controllers/perf_event.hpp | 103 ++++++++++
6 files changed, 337 insertions(+), 3 deletions(-)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 963d4201a..49f620851 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -356,7 +356,9 @@ if (ENABLE_CGROUPS_v2)
slave/containerizer/mesos/isolators/cgroups2/controller.cpp
slave/containerizer/mesos/isolators/cgroups2/controllers/core.cpp
slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.cpp
- slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp)
+ slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
+ slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.cpp)
+
endif ()
diff --git a/src/Makefile.am b/src/Makefile.am
index 779b893fc..68a93674f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1505,7 +1505,10 @@ MESOS_LINUX_FILES +=
\
slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.cpp \
slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.hpp \
slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp \
- slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp
+ slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp \
+ slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.cpp \
+ slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.hpp
+
endif
if ENABLE_SECCOMP_ISOLATOR
diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp
b/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp
index 76c8df9b1..cd9c38d9d 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp
@@ -21,6 +21,7 @@
#include "slave/containerizer/mesos/isolators/cgroups2/controllers/core.hpp"
#include "slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.hpp"
#include "slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp"
+#include
"slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.hpp"
#include <set>
#include <string>
@@ -77,7 +78,8 @@ Try<Isolator*> Cgroups2IsolatorProcess::create(const Flags&
flags)
hashmap<string, Try<Owned<ControllerProcess>>(*)(const Flags&)> creators = {
{"core", &CoreControllerProcess::create},
{"cpu", &CpuControllerProcess::create},
- {"mem", &MemoryControllerProcess::create}
+ {"mem", &MemoryControllerProcess::create},
+ {"perf_event", &PerfEventControllerProcess::create}
};
hashmap<string, Owned<Controller>> controllers;
diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp
b/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp
index 1fb713837..bad79ad0c 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp
@@ -38,6 +38,7 @@ const Bytes CGROUPS2_MIN_MEMORY = Megabytes(32);
const std::string CGROUPS2_CONTROLLER_CORE_NAME = "core";
const std::string CGROUPS2_CONTROLLER_CPU_NAME = "cpu";
const std::string CGROUPS2_CONTROLLER_MEMORY_NAME = "memory";
+const std::string CGROUPS2_CONTROLLER_PERF_EVENT_NAME = "perf_event";
} // namespace slave {
} // namespace internal {
diff --git
a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.cpp
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.cpp
new file mode 100644
index 000000000..404f8d23a
--- /dev/null
+++
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.cpp
@@ -0,0 +1,223 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
"slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.hpp"
+
+#include <process/defer.hpp>
+#include <process/delay.hpp>
+#include <process/id.hpp>
+#include <process/reap.hpp>
+#include <stout/duration.hpp>
+#include <stout/error.hpp>
+
+#include "linux/perf.hpp"
+#include "slave/containerizer/mesos/isolators/cgroups2/constants.hpp"
+
+using process::Clock;
+using process::Failure;
+using process::Future;
+using process::Owned;
+using process::PID;
+using process::Time;
+
+using std::set;
+using std::string;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+Try<process::Owned<ControllerProcess>> PerfEventControllerProcess::create(
+ const Flags& flags)
+{
+ if (flags.perf_events.isNone()) {
+ return Owned<ControllerProcess>(
+ new PerfEventControllerProcess(flags, set<string>{}));
+ }
+
+ if (!perf::supported()) {
+ return Error("Perf is not supported");
+ }
+
+ if (flags.perf_duration > flags.perf_interval) {
+ return Error(
+ "Sampling perf for duration (" + stringify(flags.perf_duration) + ") > "
+ "interval (" + stringify(flags.perf_interval) + ") is not supported.");
+ }
+
+ set<string> events;
+ foreach (const string& event, strings::tokenize(*flags.perf_events, ",")) {
+ events.insert(event);
+ }
+
+ if (!perf::valid(events)) {
+ return Error("Invalid perf events: " + stringify(events));
+ }
+
+ LOG(INFO) << "perf_event controller will profile for "
+ << "'" << flags.perf_duration << "' "
+ << "every '" << flags.perf_interval << "' "
+ << "for events: " << stringify(events);
+
+ return Owned<ControllerProcess>(
+ new PerfEventControllerProcess(flags, events));
+}
+
+
+PerfEventControllerProcess::PerfEventControllerProcess(
+ const Flags& _flags, const std::set<std::string>& _events)
+ : ProcessBase(process::ID::generate("cgroups-v2-perf-event-controller")),
+ ControllerProcess(_flags),
+ events(_events) {}
+
+
+string PerfEventControllerProcess::name() const
+{
+ return CGROUPS2_CONTROLLER_PERF_EVENT_NAME;
+}
+
+void PerfEventControllerProcess::initialize()
+{
+ // Start sampling.
+ if (!events.empty()) {
+ sample();
+ }
+}
+
+
+Future<Nothing> PerfEventControllerProcess::recover(
+ const ContainerID& containerId, const string& cgroup)
+{
+ if (infos.contains(containerId)) {
+ return Failure("The controller '" + name() + "' has already been
recovered");
+ }
+
+ infos.put(containerId, Owned<Info>(new Info(cgroup)));
+
+ return Nothing();
+}
+
+
+Future<Nothing> PerfEventControllerProcess::prepare(
+ const ContainerID& containerId,
+ const string& cgroup,
+ const mesos::slave::ContainerConfig& containerConfig)
+{
+ if (infos.contains(containerId)) {
+ return Failure("The controller '" + name() + "' has already been
prepared");
+ }
+
+ infos.put(containerId, Owned<Info>(new Info(cgroup)));
+
+ return Nothing();
+}
+
+
+Future<ResourceStatistics> PerfEventControllerProcess::usage(
+ const ContainerID& containerId, const string& cgroup)
+{
+ if (!infos.contains(containerId)) {
+ return Failure(
+ "Failed to get the usage of controller '" + name() +
+ "'"
+ ": Unknown container");
+ }
+
+ ResourceStatistics statistics;
+ statistics.mutable_perf()->CopyFrom(infos[containerId]->statistics);
+
+ return statistics;
+}
+
+
+Future<Nothing> PerfEventControllerProcess::cleanup(
+ const ContainerID& containerId, const string& cgroup)
+{
+ if (!infos.contains(containerId)) {
+ VLOG(1) << "Ignoring cleanup controller '" << name() << "' "
+ << "request for unknown container " << containerId;
+
+ return Nothing();
+ }
+
+ infos.erase(containerId);
+
+ return Nothing();
+}
+
+
+void PerfEventControllerProcess::sample()
+{
+ // Collect a perf sample for all cgroups that are not being
+ // destroyed. Since destroyal is asynchronous, 'perf stat' may
+ // fail if the cgroup is destroyed before running perf.
+ set<string> cgroups;
+
+ foreachvalue (const Owned<Info>& info, infos) {
+ cgroups.insert(info->cgroup);
+ }
+
+ // The discard timeout includes an allowance of twice the
+ // reaper interval to ensure we see the perf process exit.
+ Duration timeout = flags.perf_duration + process::MAX_REAP_INTERVAL() * 2;
+ Duration duration = flags.perf_duration;
+
+ perf::sample(events, cgroups, flags.perf_duration)
+ .after(timeout, [=](Future<hashmap<string, PerfStatistics>> future) {
+ LOG(ERROR) << "Perf sample of " << stringify(duration)
+ << " failed to complete within " << stringify(timeout)
+ << "; sampling will be halted";
+
+ future.discard();
+
+ return future;
+ })
+ .onAny(defer(PID<PerfEventControllerProcess>(this),
+ &PerfEventControllerProcess::_sample,
+ Clock::now() + flags.perf_interval,
+ lambda::_1));
+}
+
+
+void PerfEventControllerProcess::_sample(
+ const Time& next, const Future<hashmap<string, PerfStatistics>>& statistics)
+{
+ if (!statistics.isReady()) {
+ // In case the failure is transient or this is due to a timeout,
+ // we continue sampling. Note that since sampling is done on an
+ // interval, it should be ok if this is a non-transient failure.
+ LOG(ERROR) << "Failed to get the perf sample: "
+ << (statistics.isFailed() ? statistics.failure() : "timeout");
+ } else {
+ // Store the latest statistics, note that cgroups added in the
+ // interim will be picked up by the next sample.
+ foreachvalue (const Owned<Info>& info, infos) {
+ if (statistics->contains(info->cgroup)) {
+ info->statistics = statistics->get(info->cgroup).get();
+ }
+ }
+ }
+
+ // Schedule sample for the next time.
+ delay(next - Clock::now(),
+ PID<PerfEventControllerProcess>(this),
+ &PerfEventControllerProcess::sample);
+}
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
diff --git
a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.hpp
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.hpp
new file mode 100644
index 000000000..bfbbb48ac
--- /dev/null
+++
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/perf_event.hpp
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __PERF_EVENT_HPP__
+#define __PERF_EVENT_HPP__
+
+#include <set>
+#include <string>
+
+#include <mesos/resources.hpp>
+#include <process/clock.hpp>
+#include <process/future.hpp>
+#include <process/owned.hpp>
+#include <process/time.hpp>
+#include <stout/hashmap.hpp>
+
+#include "slave/containerizer/mesos/isolators/cgroups2/constants.hpp"
+#include "slave/containerizer/mesos/isolators/cgroups2/controller.hpp"
+#include "slave/flags.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+class PerfEventControllerProcess : public ControllerProcess
+{
+public:
+ static Try<process::Owned<ControllerProcess>> create(const Flags& flags);
+
+ ~PerfEventControllerProcess() override = default;
+
+ std::string name() const override;
+
+ process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const std::string& cgroup,
+ const mesos::slave::ContainerConfig& containerConfig) override;
+
+ process::Future<Nothing> recover(
+ const ContainerID& containerId, const std::string& cgroup) override;
+
+ process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId, const std::string& cgroup) override;
+
+ process::Future<Nothing> cleanup(
+ const ContainerID& containerId, const std::string& cgroup) override;
+
+protected:
+ void initialize() override;
+
+private:
+ PerfEventControllerProcess(
+ const Flags& flags, const std::set<std::string>& _events);
+
+ struct Info
+ {
+ Info(const std::string& _cgroup) : cgroup(_cgroup)
+ {
+ // Ensure the initial statistics include the required fields.
+ // Note the duration is set to zero to indicate no sampling has
+ // taken place. This empty sample will be returned from usage()
+ // until the first true sample is obtained.
+ statistics.set_timestamp(process::Clock::now().secs());
+ statistics.set_duration(Seconds(0).secs());
+ }
+
+ const std::string cgroup;
+ PerfStatistics statistics;
+ };
+
+ void sample();
+
+ void _sample(
+ const process::Time& next,
+ const process::Future<hashmap<std::string, PerfStatistics>>& statistics);
+
+ // Set of events to sample.
+ std::set<std::string> events;
+
+ // Stores cgroups associated information for container.
+ hashmap<ContainerID, process::Owned<Info>> infos;
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+
+#endif // __PERF_EVENT_HPP__
\ No newline at end of file