This is an automated email from the ASF dual-hosted git repository. bmahler pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 4a60f96a41c49134350ce43d2ae2619fc659b079 Author: Ilya Pronin <[email protected]> AuthorDate: Wed Jan 29 15:59:25 2020 -0800 Added support for 64 bit rates in the port_mapping isolator. Currenty byte rates in the port_mapping isolator are represented by uint32_t as in libnl 3.3. Because of that the maximum byte rate supported by the isolator is ~34 Gbit/s (4294967295*8). This limits the isolator's capability to utilize higher-speed NIC, e.g. 100G. This patch changes byte rates to uint64_t, which allows us to support support values up to ~18 Ebit/s (not UINT64_MAX because of the need to convert bytes to bits for tc in the isolation script). This change requires support from libnl. We have to bump the minimal required version to 3.5.0 to get HTB API with 64 bit rate/ceil. --- src/linux/routing/queueing/htb.cpp | 17 ++++++++---- src/linux/routing/queueing/htb.hpp | 8 +++--- .../mesos/isolators/network/port_mapping.cpp | 8 +++--- src/tests/containerizer/port_mapping_tests.cpp | 31 ++++++++++++++-------- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/linux/routing/queueing/htb.cpp b/src/linux/routing/queueing/htb.cpp index eaaad55b0..253441d8f 100644 --- a/src/linux/routing/queueing/htb.cpp +++ b/src/linux/routing/queueing/htb.cpp @@ -85,13 +85,13 @@ Try<Nothing> encode<htb::cls::Config>( const Netlink<struct rtnl_class>& cls, const htb::cls::Config& config) { - int error = rtnl_htb_set_rate(cls.get(), config.rate); + int error = rtnl_htb_set_rate64(cls.get(), config.rate); if (error != 0) { return Error(string(nl_geterror(error))); } if (config.ceil.isSome()) { - error = rtnl_htb_set_ceil(cls.get(), config.ceil.get()); + error = rtnl_htb_set_ceil64(cls.get(), config.ceil.get()); if (error != 0) { return Error(string(nl_geterror(error))); } @@ -127,8 +127,15 @@ Result<htb::cls::Config> decode<htb::cls::Config>( htb::cls::Config config; - uint32_t rate = rtnl_htb_get_rate(cls.get()); - uint32_t ceil = rtnl_htb_get_ceil(cls.get()); + // With 32bit rates this function used to return 0 rate and ceil in + // case of an error. We keep the same behavior even though 64bit + // getters from libnl are capable of returning an error when class + // data cannot be recorded or an attribute is not present. + uint64_t rate = 0; + uint64_t ceil = 0; + rtnl_htb_get_rate64(cls.get(), &rate); + rtnl_htb_get_ceil64(cls.get(), &ceil); + // NOTE: The libnl documentation is incorrect/confusing. The // correct buffer for sending at the ceil rate is rbuffer, *not* // the cbuffer. @@ -138,7 +145,7 @@ Result<htb::cls::Config> decode<htb::cls::Config>( return htb::cls::Config( rate, - (ceil > 0) ? Option<uint32_t>(ceil) : None(), + (ceil > 0) ? Option<uint64_t>(ceil) : None(), (burst > 0) ? Option<uint32_t>(burst) : None()); } diff --git a/src/linux/routing/queueing/htb.hpp b/src/linux/routing/queueing/htb.hpp index 64f16841f..74b408aa0 100644 --- a/src/linux/routing/queueing/htb.hpp +++ b/src/linux/routing/queueing/htb.hpp @@ -81,8 +81,8 @@ namespace cls { struct Config { Config( - uint32_t _rate, - Option<uint32_t> _ceil = None(), + uint64_t _rate, + Option<uint64_t> _ceil = None(), Option<uint32_t> _burst = None()) : rate(_rate), ceil(_ceil), @@ -99,9 +99,9 @@ struct Config // Normal rate limit. The size of the normal rate bucket is not // exposed and will be computed by the kernel. - uint32_t rate; + uint64_t rate; // Burst limit. - Option<uint32_t> ceil; + Option<uint64_t> ceil; // Size of the burst bucket. Option<uint32_t> burst; }; diff --git a/src/slave/containerizer/mesos/isolators/network/port_mapping.cpp b/src/slave/containerizer/mesos/isolators/network/port_mapping.cpp index bb52f333e..603825d4e 100644 --- a/src/slave/containerizer/mesos/isolators/network/port_mapping.cpp +++ b/src/slave/containerizer/mesos/isolators/network/port_mapping.cpp @@ -617,8 +617,8 @@ static Result<htb::cls::Config> parseHTBConfig(const JSON::Object& object) } return htb::cls::Config( - rate->as<uint32_t>(), - ceil.isSome() ? Option<uint32_t>(ceil->as<uint32_t>()) + rate->as<uint64_t>(), + ceil.isSome() ? Option<uint64_t>(ceil->as<uint64_t>()) : None(), burst.isSome() ? Option<uint32_t>(burst->as<uint32_t>()) : None()); @@ -5093,7 +5093,7 @@ Option<htb::cls::Config> PortMappingIsolatorProcess::egressHTBConfig( rate = std::min(flags.maximum_egress_rate_limit.get(), rate); } - Option<uint32_t> ceil; + Option<uint64_t> ceil; Option<uint32_t> burst; if (flags.egress_ceil_limit.isSome() && @@ -5132,7 +5132,7 @@ Option<htb::cls::Config> PortMappingIsolatorProcess::ingressHTBConfig( rate = std::min(flags.maximum_ingress_rate_limit.get(), rate); } - Option<uint32_t> ceil; + Option<uint64_t> ceil; Option<uint32_t> burst; if (flags.ingress_ceil_limit.isSome() && diff --git a/src/tests/containerizer/port_mapping_tests.cpp b/src/tests/containerizer/port_mapping_tests.cpp index 9bc94aad6..12d4626ca 100644 --- a/src/tests/containerizer/port_mapping_tests.cpp +++ b/src/tests/containerizer/port_mapping_tests.cpp @@ -1866,29 +1866,30 @@ TEST_F(PortMappingIsolatorTest, ROOT_ScaleEgressWithCPULarge) "cpus:64"); flags.resources = strings::join(";", resources); - const Bytes linkSpeed = 3125000000; // 25 Gbit/s - const Bytes ratePerCpu = linkSpeed / 64; + const Bytes linkSpeed = 12500000000; // 100 Gbit/s flags.network_link_speed = linkSpeed; - const Bytes minRate = 225000000; // 1.8 Gbit/s + const Bytes minRate = 625000000; // 5 Gbit/s flags.minimum_egress_rate_limit = minRate; - const Bytes maxRate = 2187500000; // 17.5 Gbit/s + const Bytes maxRate = 11250000000; // 90 Gbit/s flags.maximum_egress_rate_limit = maxRate; + const Bytes ratePerCpu = linkSpeed / 64; + // CPU high enough to be in linear scaling region and to trigger uint32_t - // overflow of scaled rate represented as bit/s: 16 * 3125000000 / 64 = - // 781250000 B/s or 6250000000 bits/s. - Try<Resources> linearCpu = Resources::parse("cpus:16;mem:1024;disk:1024"); + // overflow of scaled rate represented as bit/s: 30 * 12500000000 / 64 = + // 5859375000 or 46875000000 bits/s. + Try<Resources> linearCpu = Resources::parse("cpus:30;mem:1024;disk:1024"); ASSERT_SOME(linearCpu); // CPU low enough for scaled network egress to be increased to the min limit: - // 4 * 3125000000 / 64 = 195312500 B/s - Try<Resources> lowCpu = Resources::parse("cpus:4;mem:1024;disk:1024"); + // 1 * 12500000000 / 64 = 195312500 B/s. + Try<Resources> lowCpu = Resources::parse("cpus:1;mem:1024;disk:1024"); ASSERT_SOME(lowCpu); // CPU high enough for scaled network egress to be reduced to the max limit: - // 60 * 3125000000 / 64 = 2929687500 B/s. + // 60 * 12500000000 / 64 = 11718750000 B/s. Try<Resources> highCpu = Resources::parse("cpus:60;mem:1024;disk:1024"); ASSERT_SOME(highCpu); @@ -1945,7 +1946,7 @@ TEST_F(PortMappingIsolatorTest, ROOT_ScaleEgressWithCPULarge) Result<htb::cls::Config> config = recoverHTBConfig(pid.get(), eth0, flags); ASSERT_SOME(config); - ASSERT_EQ(ratePerCpu * 16, config->rate); + ASSERT_EQ(ratePerCpu * floor(linearCpu->cpus().get()), config->rate); // Reduce CPU to get to hit the min limit. Future<Nothing> update = isolator.get()->update(containerId1, lowCpu.get()); @@ -1955,6 +1956,14 @@ TEST_F(PortMappingIsolatorTest, ROOT_ScaleEgressWithCPULarge) ASSERT_SOME(config); ASSERT_EQ(minRate, config->rate); + // Increase CPU back to the linear limit. + update = isolator.get()->update(containerId1, linearCpu.get()); + AWAIT_READY(update); + + config = recoverHTBConfig(pid.get(), eth0, flags); + ASSERT_SOME(config); + ASSERT_EQ(ratePerCpu * floor(linearCpu->cpus().get()), config->rate); + // Increase CPU to hit the max limit. update = isolator.get()->update(containerId1, highCpu.get()); AWAIT_READY(update);
