This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push: new 77226e5b0 Blkio isolator: handled missing CFQ statistics. 77226e5b0 is described below commit 77226e5b092a313dab24ac4a49754d1a6f0f710d Author: Charles-Francois Natali <cf.nat...@gmail.com> AuthorDate: Sat Aug 27 09:37:10 2022 +0100 Blkio isolator: handled missing CFQ statistics. CFQ was removed from 5.0 kernel, so we need to silently ignore missing CFQ statistics. Before: ``` [----------] 1 test from CgroupsIsolatorTest [ RUN ] [...] CgroupsIsolatorTest.ROOT_CGROUPS_BlkioUsage W0824 21:58:36.604790 308667 cgroups.cpp:932] Skipping resource statistic for container b2d67073-85fa-4a0b-84a3-790791047eac because: Failed to read from 'blkio.time': No such file or directory ../../src/tests/containerizer/cgroups_isolator_tests.cpp:2656: Failure Value of: usage->has_blkio_statistics() Actual: false Expected: true [ [...] FAILED ] CgroupsIsolatorTest.ROOT_CGROUPS_BlkioUsage (1888 ms) ``` --- .../mesos/isolators/cgroups/subsystems/blkio.cpp | 349 +++++++++++---------- 1 file changed, 181 insertions(+), 168 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp index 19afed989..6d0d7f7de 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp @@ -27,6 +27,7 @@ extern "C" { #include <stout/foreach.hpp> #include <stout/hashmap.hpp> +#include "common/kernel_version.hpp" #include "linux/cgroups.hpp" #include "slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.hpp" @@ -107,230 +108,241 @@ Future<ResourceStatistics> BlkioSubsystemProcess::usage( CgroupInfo::Blkio::CFQ::Statistics totalCfqRecursive; CgroupInfo::Blkio::Throttling::Statistics totalThrottling; - // Get CFQ statistics. - Try<vector<cgroups::blkio::Value>> time = cfq::time(hierarchy, cgroup); - if (time.isError()) { - return Failure(time.error()); + // Get CFQ statistics, if available - CFQ was removed from kernel 5.0, see + // https://github.com/torvalds/linux/commit/f382fb0bcef4c37dc049e9f6963e3baf204d815c + + Try<Version> version = mesos::kernelVersion(); + if (version.isError()) { + return Failure("Could not determine kernel version"); } - foreach (const cgroups::blkio::Value& value, time.get()) { - if (value.device.isNone()) { - totalCfq.set_time(value.value); - } else { - cfq[value.device.get()].set_time(value.value); + if (version.get() < Version(5, 0, 0)) { + Try<vector<cgroups::blkio::Value>> time = cfq::time(hierarchy, cgroup); + if (time.isError()) { + return Failure(time.error()); } - } - Try<vector<cgroups::blkio::Value>> sectors = cfq::sectors(hierarchy, cgroup); - if (sectors.isError()) { - return Failure(sectors.error()); - } + foreach (const cgroups::blkio::Value& value, time.get()) { + if (value.device.isNone()) { + totalCfq.set_time(value.value); + } else { + cfq[value.device.get()].set_time(value.value); + } + } - foreach (const cgroups::blkio::Value& value, sectors.get()) { - if (value.device.isNone()) { - totalCfq.set_sectors(value.value); - } else { - cfq[value.device.get()].set_sectors(value.value); + Try<vector<cgroups::blkio::Value>> sectors = + cfq::sectors(hierarchy, cgroup); + if (sectors.isError()) { + return Failure(sectors.error()); } - } - Try<vector<cgroups::blkio::Value>> io_service_bytes = - cfq::io_service_bytes(hierarchy, cgroup); + foreach (const cgroups::blkio::Value& value, sectors.get()) { + if (value.device.isNone()) { + totalCfq.set_sectors(value.value); + } else { + cfq[value.device.get()].set_sectors(value.value); + } + } - if (io_service_bytes.isError()) { - return Failure(io_service_bytes.error()); - } + Try<vector<cgroups::blkio::Value>> io_service_bytes = + cfq::io_service_bytes(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_service_bytes.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_service_bytes() - : totalCfq.add_io_service_bytes(); + if (io_service_bytes.isError()) { + return Failure(io_service_bytes.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_service_bytes.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfq[statValue.device.get()].add_io_service_bytes() + : totalCfq.add_io_service_bytes(); - Try<vector<cgroups::blkio::Value>> io_serviced = - cfq::io_serviced(hierarchy, cgroup); + setValue(statValue, value); + } - if (io_serviced.isError()) { - return Failure(io_serviced.error()); - } + Try<vector<cgroups::blkio::Value>> io_serviced = + cfq::io_serviced(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_serviced.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_serviced() - : totalCfq.add_io_serviced(); + if (io_serviced.isError()) { + return Failure(io_serviced.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_serviced.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfq[statValue.device.get()].add_io_serviced() + : totalCfq.add_io_serviced(); - Try<vector<cgroups::blkio::Value>> io_service_time = - cfq::io_service_time(hierarchy, cgroup); + setValue(statValue, value); + } - if (io_service_time.isError()) { - return Failure(io_service_time.error()); - } + Try<vector<cgroups::blkio::Value>> io_service_time = + cfq::io_service_time(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_service_time.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_service_time() - : totalCfq.add_io_service_time(); + if (io_service_time.isError()) { + return Failure(io_service_time.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_service_time.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfq[statValue.device.get()].add_io_service_time() + : totalCfq.add_io_service_time(); - Try<vector<cgroups::blkio::Value>> io_wait_time = - cfq::io_wait_time(hierarchy, cgroup); + setValue(statValue, value); + } - if (io_wait_time.isError()) { - return Failure(io_wait_time.error()); - } + Try<vector<cgroups::blkio::Value>> io_wait_time = + cfq::io_wait_time(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_wait_time.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_wait_time() - : totalCfq.add_io_wait_time(); + if (io_wait_time.isError()) { + return Failure(io_wait_time.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_wait_time.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfq[statValue.device.get()].add_io_wait_time() + : totalCfq.add_io_wait_time(); - Try<vector<cgroups::blkio::Value>> io_merged = - cfq::io_merged(hierarchy, cgroup); + setValue(statValue, value); + } - if (io_merged.isError()) { - return Failure(io_merged.error()); - } + Try<vector<cgroups::blkio::Value>> io_merged = + cfq::io_merged(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_merged.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_merged() - : totalCfq.add_io_merged(); + if (io_merged.isError()) { + return Failure(io_merged.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_merged.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfq[statValue.device.get()].add_io_merged() + : totalCfq.add_io_merged(); - Try<vector<cgroups::blkio::Value>> io_queued = - cfq::io_queued(hierarchy, cgroup); + setValue(statValue, value); + } - if (io_queued.isError()) { - return Failure(io_queued.error()); - } + Try<vector<cgroups::blkio::Value>> io_queued = + cfq::io_queued(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_queued.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_queued() - : totalCfq.add_io_queued(); + if (io_queued.isError()) { + return Failure(io_queued.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_queued.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfq[statValue.device.get()].add_io_queued() + : totalCfq.add_io_queued(); - // Get CFQ recursive statistics (blkio.*_recursive). - time = cfq::time_recursive(hierarchy, cgroup); - if (time.isError()) { - return Failure(time.error()); - } + setValue(statValue, value); + } - foreach (const cgroups::blkio::Value& value, time.get()) { - if (value.device.isNone()) { - totalCfqRecursive.set_time(value.value); - } else { - cfqRecursive[value.device.get()].set_time(value.value); + // Get CFQ recursive statistics (blkio.*_recursive). + time = cfq::time_recursive(hierarchy, cgroup); + if (time.isError()) { + return Failure(time.error()); } - } - sectors = cfq::sectors_recursive(hierarchy, cgroup); - if (sectors.isError()) { - return Failure(sectors.error()); - } + foreach (const cgroups::blkio::Value& value, time.get()) { + if (value.device.isNone()) { + totalCfqRecursive.set_time(value.value); + } else { + cfqRecursive[value.device.get()].set_time(value.value); + } + } - foreach (const cgroups::blkio::Value& value, sectors.get()) { - if (value.device.isNone()) { - totalCfqRecursive.set_sectors(value.value); - } else { - cfqRecursive[value.device.get()].set_sectors(value.value); + sectors = cfq::sectors_recursive(hierarchy, cgroup); + if (sectors.isError()) { + return Failure(sectors.error()); } - } - io_service_bytes = cfq::io_service_bytes_recursive(hierarchy, cgroup); - if (io_service_bytes.isError()) { - return Failure(io_service_bytes.error()); - } + foreach (const cgroups::blkio::Value& value, sectors.get()) { + if (value.device.isNone()) { + totalCfqRecursive.set_sectors(value.value); + } else { + cfqRecursive[value.device.get()].set_sectors(value.value); + } + } - foreach (const cgroups::blkio::Value& statValue, io_service_bytes.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfqRecursive[statValue.device.get()].add_io_service_bytes() - : totalCfqRecursive.add_io_service_bytes(); + io_service_bytes = cfq::io_service_bytes_recursive(hierarchy, cgroup); + if (io_service_bytes.isError()) { + return Failure(io_service_bytes.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_service_bytes.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfqRecursive[statValue.device.get()].add_io_service_bytes() + : totalCfqRecursive.add_io_service_bytes(); - io_serviced = cfq::io_serviced_recursive(hierarchy, cgroup); - if (io_serviced.isError()) { - return Failure(io_serviced.error()); - } + setValue(statValue, value); + } - foreach (const cgroups::blkio::Value& statValue, io_serviced.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfqRecursive[statValue.device.get()].add_io_serviced() - : totalCfqRecursive.add_io_serviced(); + io_serviced = cfq::io_serviced_recursive(hierarchy, cgroup); + if (io_serviced.isError()) { + return Failure(io_serviced.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_serviced.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfqRecursive[statValue.device.get()].add_io_serviced() + : totalCfqRecursive.add_io_serviced(); - io_service_time = cfq::io_service_time_recursive(hierarchy, cgroup); - if (io_service_time.isError()) { - return Failure(io_service_time.error()); - } + setValue(statValue, value); + } - foreach (const cgroups::blkio::Value& statValue, io_service_time.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfqRecursive[statValue.device.get()].add_io_service_time() - : totalCfqRecursive.add_io_service_time(); + io_service_time = cfq::io_service_time_recursive(hierarchy, cgroup); + if (io_service_time.isError()) { + return Failure(io_service_time.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_service_time.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfqRecursive[statValue.device.get()].add_io_service_time() + : totalCfqRecursive.add_io_service_time(); - io_wait_time = cfq::io_wait_time_recursive(hierarchy, cgroup); - if (io_wait_time.isError()) { - return Failure(io_wait_time.error()); - } + setValue(statValue, value); + } - foreach (const cgroups::blkio::Value& statValue, io_wait_time.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfqRecursive[statValue.device.get()].add_io_wait_time() - : totalCfqRecursive.add_io_wait_time(); + io_wait_time = cfq::io_wait_time_recursive(hierarchy, cgroup); + if (io_wait_time.isError()) { + return Failure(io_wait_time.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_wait_time.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfqRecursive[statValue.device.get()].add_io_wait_time() + : totalCfqRecursive.add_io_wait_time(); - io_merged = cfq::io_merged_recursive(hierarchy, cgroup); - if (io_merged.isError()) { - return Failure(io_merged.error()); - } + setValue(statValue, value); + } - foreach (const cgroups::blkio::Value& statValue, io_merged.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfqRecursive[statValue.device.get()].add_io_merged() - : totalCfqRecursive.add_io_merged(); + io_merged = cfq::io_merged_recursive(hierarchy, cgroup); + if (io_merged.isError()) { + return Failure(io_merged.error()); + } - setValue(statValue, value); - } + foreach (const cgroups::blkio::Value& statValue, io_merged.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfqRecursive[statValue.device.get()].add_io_merged() + : totalCfqRecursive.add_io_merged(); - io_queued = cfq::io_queued_recursive(hierarchy, cgroup); - if (io_queued.isError()) { - return Failure(io_queued.error()); - } + setValue(statValue, value); + } - foreach (const cgroups::blkio::Value& statValue, io_queued.get()) { - CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfqRecursive[statValue.device.get()].add_io_queued() - : totalCfqRecursive.add_io_queued(); + io_queued = cfq::io_queued_recursive(hierarchy, cgroup); + if (io_queued.isError()) { + return Failure(io_queued.error()); + } - setValue(statValue, value); + foreach (const cgroups::blkio::Value& statValue, io_queued.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() + ? cfqRecursive[statValue.device.get()].add_io_queued() + : totalCfqRecursive.add_io_queued(); + + setValue(statValue, value); + } } // Get throttling statistics. - io_serviced = throttle::io_serviced(hierarchy, cgroup); + Try<vector<cgroups::blkio::Value>> io_serviced = + throttle::io_serviced(hierarchy, cgroup); if (io_serviced.isError()) { return Failure(io_serviced.error()); } @@ -343,7 +355,8 @@ Future<ResourceStatistics> BlkioSubsystemProcess::usage( setValue(statValue, value); } - io_service_bytes = throttle::io_service_bytes(hierarchy, cgroup); + Try<vector<cgroups::blkio::Value>> io_service_bytes = + throttle::io_service_bytes(hierarchy, cgroup); if (io_service_bytes.isError()) { return Failure(io_service_bytes.error()); }