(mesos) branch master updated: Removed `-k` option when installing Ruby
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new b54ff0f1d Removed `-k` option when installing Ruby b54ff0f1d is described below commit b54ff0f1d181c03bbf68009d80499d9f184584b6 Author: Qian Zhang AuthorDate: Wed Mar 20 10:50:25 2024 +0800 Removed `-k` option when installing Ruby --- support/mesos-website/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/support/mesos-website/Dockerfile b/support/mesos-website/Dockerfile index 617883e0c..2ffed3cc9 100644 --- a/support/mesos-website/Dockerfile +++ b/support/mesos-website/Dockerfile @@ -20,7 +20,7 @@ RUN apt-get update && \ # Install ruby version manager to get a more updated ruby version RUN curl -sSL https://rvm.io/mpapis.asc | gpg --import - && \ curl -sSL https://rvm.io/pkuczynski.asc | gpg --import - && \ -curl -k -sSL https://get.rvm.io | bash -s stable --ruby=2.6.6 +curl -sSL https://get.rvm.io | bash -s stable --ruby=2.6.6 ENV PATH=/usr/local/rvm/rubies/ruby-2.6.6/bin:$PATH
[mesos] branch master updated: Expose setmntent error reason in MountTable::read.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 8856d6fba Expose setmntent error reason in MountTable::read. 8856d6fba is described below commit 8856d6fba11281df898fd65b0cafa1e20eb90fe8 Author: Benjamin Mahler AuthorDate: Fri Jan 13 16:00:06 2023 -0500 Expose setmntent error reason in MountTable::read. This function is documented to set errno when it fails, but we're not exposing it. --- src/linux/fs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linux/fs.cpp b/src/linux/fs.cpp index 2e36d3fa7..27536aab3 100644 --- a/src/linux/fs.cpp +++ b/src/linux/fs.cpp @@ -424,7 +424,7 @@ Try MountTable::read(const string& path) FILE* file = ::setmntent(path.c_str(), "r"); if (file == nullptr) { -return Error("Failed to open '" + path + "'"); +return ErrnoError("Failed to open '" + path + "'"); } while (true) {
[mesos] branch master updated: Blkio isolator: handled missing CFQ statistics.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 77226e5b0 Blkio isolator: handled missing CFQ statistics. 77226e5b0 is described below commit 77226e5b092a313dab24ac4a49754d1a6f0f710d Author: Charles-Francois Natali AuthorDate: Sat Aug 27 09:37:10 2022 +0100 Blkio isolator: handled missing CFQ statistics. CFQ was removed from 5.0 kernel, so we need to silently ignore missing CFQ statistics. Before: ``` [--] 1 test from CgroupsIsolatorTest [ RUN ] [...] CgroupsIsolatorTest.ROOT_CGROUPS_BlkioUsage W0824 21:58:36.604790 308667 cgroups.cpp:932] Skipping resource statistic for container b2d67073-85fa-4a0b-84a3-790791047eac because: Failed to read from 'blkio.time': No such file or directory ../../src/tests/containerizer/cgroups_isolator_tests.cpp:2656: Failure Value of: usage->has_blkio_statistics() Actual: false Expected: true [ [...] FAILED ] CgroupsIsolatorTest.ROOT_CGROUPS_BlkioUsage (1888 ms) ``` --- .../mesos/isolators/cgroups/subsystems/blkio.cpp | 349 +++-- 1 file changed, 181 insertions(+), 168 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp index 19afed989..6d0d7f7de 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.cpp @@ -27,6 +27,7 @@ extern "C" { #include #include +#include "common/kernel_version.hpp" #include "linux/cgroups.hpp" #include "slave/containerizer/mesos/isolators/cgroups/subsystems/blkio.hpp" @@ -107,230 +108,241 @@ Future BlkioSubsystemProcess::usage( CgroupInfo::Blkio::CFQ::Statistics totalCfqRecursive; CgroupInfo::Blkio::Throttling::Statistics totalThrottling; - // Get CFQ statistics. - Try> time = cfq::time(hierarchy, cgroup); - if (time.isError()) { -return Failure(time.error()); + // Get CFQ statistics, if available - CFQ was removed from kernel 5.0, see + // https://github.com/torvalds/linux/commit/f382fb0bcef4c37dc049e9f6963e3baf204d815c + + Try version = mesos::kernelVersion(); + if (version.isError()) { +return Failure("Could not determine kernel version"); } - foreach (const cgroups::blkio::Value& value, time.get()) { -if (value.device.isNone()) { - totalCfq.set_time(value.value); -} else { - cfq[value.device.get()].set_time(value.value); + if (version.get() < Version(5, 0, 0)) { +Try> time = cfq::time(hierarchy, cgroup); +if (time.isError()) { + return Failure(time.error()); } - } - Try> sectors = cfq::sectors(hierarchy, cgroup); - if (sectors.isError()) { -return Failure(sectors.error()); - } +foreach (const cgroups::blkio::Value& value, time.get()) { + if (value.device.isNone()) { +totalCfq.set_time(value.value); + } else { +cfq[value.device.get()].set_time(value.value); + } +} - foreach (const cgroups::blkio::Value& value, sectors.get()) { -if (value.device.isNone()) { - totalCfq.set_sectors(value.value); -} else { - cfq[value.device.get()].set_sectors(value.value); +Try> sectors = + cfq::sectors(hierarchy, cgroup); +if (sectors.isError()) { + return Failure(sectors.error()); } - } - Try> io_service_bytes = -cfq::io_service_bytes(hierarchy, cgroup); +foreach (const cgroups::blkio::Value& value, sectors.get()) { + if (value.device.isNone()) { +totalCfq.set_sectors(value.value); + } else { +cfq[value.device.get()].set_sectors(value.value); + } +} - if (io_service_bytes.isError()) { -return Failure(io_service_bytes.error()); - } +Try> io_service_bytes = + cfq::io_service_bytes(hierarchy, cgroup); - foreach (const cgroups::blkio::Value& statValue, io_service_bytes.get()) { -CgroupInfo::Blkio::Value* value = statValue.device.isSome() - ? cfq[statValue.device.get()].add_io_service_bytes() - : totalCfq.add_io_service_bytes(); +if (io_service_bytes.isError()) { + return Failure(io_service_bytes.error()); +} -setValue(statValue, value); - } +foreach (const cgroups::blkio::Value& statValue, io_service_bytes.get()) { + CgroupInfo::Blkio::Value* value = statValue.device.isSome() +? cfq[statValue.device.get()].add_io_service_bytes() +: totalCfq.add_io_service_bytes(); - Try> io_serviced = -cfq::io_serviced(hierarchy, cgroup); + setValue(statValue, value); +} - if (io_serviced.isError()) { -return Failure(io_serviced.error()); -
[mesos] branch master updated: ADD: New frameworks and executor to the community list.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new a69bd750a ADD: New frameworks and executor to the community list. a69bd750a is described below commit a69bd750a0cfa82d7327ca42ac9a82df9255e12a Author: Andreas Peters AuthorDate: Sun Aug 7 14:53:50 2022 +0200 ADD: New frameworks and executor to the community list. --- docs/frameworks.md | 8 1 file changed, 8 insertions(+) diff --git a/docs/frameworks.md b/docs/frameworks.md index 368ecad9a..92d1f1c75 100644 --- a/docs/frameworks.md +++ b/docs/frameworks.md @@ -11,12 +11,15 @@ layout: documentation ## Long Running Services * [Aurora](http://aurora.apache.org) is a service scheduler that runs on top of Mesos, enabling you to run long-running services that take advantage of Mesos' scalability, fault-tolerance, and resource isolation. +* [Compose](https://github.com/AVENTER-UG/mesos-compose) is a scheduler (HTTP API) for long running Mesos tasks. The syntax is compatible to docker-compose. +* [M3s](https://github.com/AVENTER-UG/mesos-m3s) is a scheduler (HTTP API) to run multiple K3s (Kubernetes) cluster on top of Mesos. * [Marathon](https://github.com/mesosphere/marathon) is a private PaaS built on Mesos. It automatically handles hardware or software failures and ensures that an app is "always on". * [Singularity](https://github.com/HubSpot/Singularity) is a scheduler (HTTP API and web interface) for running Mesos tasks: long running processes, one-off tasks, and scheduled jobs. * [SSSP](https://github.com/mesosphere/sssp) is a simple web application that provides a white-label "Megaupload" for storing and sharing files in S3. ## Big Data Processing +* [Apache Airflow provider](https://github.com/AVENTER-UG/airflow-provider-mesos) is a scheduler to scale out Apache Airflow DAG's on Mesos. * [Cray Chapel](https://github.com/nqn/mesos-chapel) is a productive parallel programming language. The Chapel Mesos scheduler lets you run Chapel programs on Mesos. * [Dpark](https://github.com/douban/dpark) is a Python clone of Spark, a MapReduce-like framework written in Python, running on Mesos. * [Exelixi](https://github.com/mesosphere/exelixi) is a distributed framework for running genetic algorithms at scale. @@ -49,3 +52,8 @@ layout: documentation ## Machine Learning * [TFMesos](https://github.com/douban/tfmesos) is a lightweight framework to help running distributed [Tensorflow](https://www.tensorflow.org/) Machine Learning tasks on Apache Mesos with GPU support. + +## Load Balancing + +* [Traefik Mesos provider](https://github.com/AVENTER-UG/traefik-mesos) is a modern HTTP reverse proxy and load balancer with build-in TCP and UDP support. +
[mesos] branch master updated: Bump tzinfo from 1.2.5 to 1.2.10 in /site
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new b801837d9 Bump tzinfo from 1.2.5 to 1.2.10 in /site b801837d9 is described below commit b801837d9727dc8b74a2fe254ef0e7efb6676b50 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Fri Jul 22 01:10:47 2022 + Bump tzinfo from 1.2.5 to 1.2.10 in /site Bumps [tzinfo](https://github.com/tzinfo/tzinfo) from 1.2.5 to 1.2.10. - [Release notes](https://github.com/tzinfo/tzinfo/releases) - [Changelog](https://github.com/tzinfo/tzinfo/blob/master/CHANGES.md) - [Commits](https://github.com/tzinfo/tzinfo/compare/v1.2.5...v1.2.10) --- updated-dependencies: - dependency-name: tzinfo dependency-type: indirect ... Signed-off-by: dependabot[bot] --- site/Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/Gemfile.lock b/site/Gemfile.lock index 595fb2f87..0daf94094 100644 --- a/site/Gemfile.lock +++ b/site/Gemfile.lock @@ -137,7 +137,7 @@ GEM thor (0.20.3) thread_safe (0.3.6) tilt (1.4.1) -tzinfo (1.2.5) +tzinfo (1.2.10) thread_safe (~> 0.1) tzinfo-data (1.2019.2) tzinfo (>= 1.0.0)
[mesos] branch master updated: ADD: matrix slack bridge.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new f316f3c97 ADD: matrix slack bridge. f316f3c97 is described below commit f316f3c9707203700ae86c7af1a349da2aa609d5 Author: Andreas Peters AuthorDate: Tue Jun 7 15:46:32 2022 +0200 ADD: matrix slack bridge. --- site/source/community.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/source/community.html.md b/site/source/community.html.md index 5a86ed366..f9f7b2e6d 100644 --- a/site/source/community.html.md +++ b/site/source/community.html.md @@ -47,7 +47,7 @@ layout: community_section IRC If you want to connect to our slack team through an IRC client please click https://my.slack.com/gateways";>here after signing up. -The slack channel is currently also mirrored at #mesos channel on irc.freenode.net. +The slack channel is currently also mirrored at #mesos channel on irc.freenode.net and at https://matrix.to/#/#mesos:matrix.aventer.biz";>matrix. If you are new to IRC, you can use a http://webchat.freenode.net/?channels=#mesos";>web-based client.
[mesos] branch master updated: Removes Twitter embeds from website community page and older blog posts.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new a96d15cfb Removes Twitter embeds from website community page and older blog posts. a96d15cfb is described below commit a96d15cfb9b9cadeda452eba3819a5f6615bc0a4 Author: Dave Lester AuthorDate: Sun Jun 5 20:00:12 2022 -0700 Removes Twitter embeds from website community page and older blog posts. --- .../2015-01-14-mesoscon-2015-earlybird-registration-now-open.md| 2 +- .../blog/2015-07-01-mesoscon-seattle-keynote-speakers-announced.md | 1 - site/source/community.html.md | 7 +++ 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/site/source/blog/2015-01-14-mesoscon-2015-earlybird-registration-now-open.md b/site/source/blog/2015-01-14-mesoscon-2015-earlybird-registration-now-open.md index 213825dce..a57157db1 100644 --- a/site/source/blog/2015-01-14-mesoscon-2015-earlybird-registration-now-open.md +++ b/site/source/blog/2015-01-14-mesoscon-2015-earlybird-registration-now-open.md @@ -11,7 +11,7 @@ tags: MesosCon, Announcement A limited number of early-bird tickets are now available for [#MesosCon 2015](http://mesoscon.org), an annual conference organized by the Apache Mesos community. MesosCon brings together users and developers to share and learn about the project and its growing ecosystem, and will take place in Seattle, WA. -.https://twitter.com/mesoscon";>@MesosCon early-bird registration is now open to the first 140 attendees! Join us! http://t.co/LcIclfttgq";>http://t.co/LcIclfttgq More info: http://t.co/Dnd5XWhoOH";>http://t.co/Dnd5XWhoOH— Apache Mesos (@ApacheMesos) https://twitter.com/ApacheMesos/status/555445970595368960";>January 14, 2015 <p>.<a href="<a rel="nofollow" href="https://twitter.com/mesoscon"">https://twitter.com/mesoscon"</a>;>@MesosCon</a> early-bird registration is now open to the first 140 attendees! Join us! <a href="<a rel="nofollow" href="http://t.co/LcIclfttgq"">http://t.co/LcIclfttgq"</a>;><a rel="nofollow" href="http://t.co/LcIclfttgq">http://t.co/LcIclfttgq</a></a> More info: <a href="<a rel="nofollow" href="http://t.co/Dnd5XWhoOH"">http://t.co/Dnd5XWhoOH"</a>;><a rel="nofollow" href="http://t.co/Dnd5XWhoOH">http://t.co/Dnd5XWhoOH</a></a></p>— Apache Mesos (@ApacheMesos) <a href="<a rel="nofollow" href="https://twitter.com/ApacheMesos/status/555445970595368960"">https://twitter.com/ApacheMesos/status/555445970595368960"</a>;>January 14, 2015</a></blockquote> [Early-bird registration is open today](<a rel="nofollow" href="http://events.linuxfoundation.org/events/mesoscon/attend/register">http://events.linuxfoundation.org/events/mesoscon/attend/register</a>) to the first 140 attendees / through February 14th, 2015. diff --git a/site/source/blog/2015-07-01-mesoscon-seattle-keynote-speakers-announced.md b/site/source/blog/2015-07-01-mesoscon-seattle-keynote-speakers-announced.md index 891159f0b..9c5fcf55e 100644 --- a/site/source/blog/2015-07-01-mesoscon-seattle-keynote-speakers-announced.md +++ b/site/source/blog/2015-07-01-mesoscon-seattle-keynote-speakers-announced.md @@ -12,7 +12,6 @@ tags: MesosCon The countdown until [#MesosCon](<a rel="nofollow" href="http://events.linuxfoundation.org/events/mesoscon">http://events.linuxfoundation.org/events/mesoscon</a>) has begun: <blockquote class="twitter-tweet" lang="en"><p lang="en" dir="ltr">Only 7 weeks until <a href="<a rel="nofollow" href="https://twitter.com/hashtag/MesosCon?src=hash"">https://twitter.com/hashtag/MesosCon?src=hash"</a>;>#MesosCon</a>! Register today and join us in Seattle <a href="<a rel="nofollow" href="http://t.co/bVJ9dL5CEO"">http://t.co/bVJ9dL5CEO"</a>;><a rel="nofollow" href="http://t.co/bVJ9dL5CEO">http://t.co/bVJ9dL5CEO</a></a></p>— #MesosCon (@mesoscon) <a href="<a rel="nofollow" href="https://twitter.com/mesoscon/status/616320613922275332"">https://twitter.com/mesoscon/status/616320613922275332"</a>;>July 1, 2015</a></blockquote> -<script async src="//platform.twitter.com/widgets.js" charset="utf-8"> MesosCon
[mesos] branch master updated: Bump pyinstaller from 3.4 to 3.6 in /src/python/cli_new
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 26b82d825 Bump pyinstaller from 3.4 to 3.6 in /src/python/cli_new 26b82d825 is described below commit 26b82d825fa6b6e6193be0a3de94d288d3c9cd87 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Mon May 23 02:28:11 2022 + Bump pyinstaller from 3.4 to 3.6 in /src/python/cli_new Bumps [pyinstaller](https://github.com/pyinstaller/pyinstaller) from 3.4 to 3.6. - [Release notes](https://github.com/pyinstaller/pyinstaller/releases) - [Changelog](https://github.com/pyinstaller/pyinstaller/blob/develop/doc/CHANGES-3.rst) - [Commits](https://github.com/pyinstaller/pyinstaller/compare/v3.4...v3.6) --- updated-dependencies: - dependency-name: pyinstaller dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- src/python/cli_new/pip-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/cli_new/pip-requirements.txt b/src/python/cli_new/pip-requirements.txt index 4681e3f05..f1920d801 100644 --- a/src/python/cli_new/pip-requirements.txt +++ b/src/python/cli_new/pip-requirements.txt @@ -6,7 +6,7 @@ kazoo==2.5.0 lazy-object-proxy==1.2.2 parse==1.8.0 Pygments==2.7.4 -PyInstaller==3.4 +PyInstaller==3.6 six==1.10.0 tenacity==4.12.0 toml==0.9.2
[mesos] branch master updated (ca26807ca -> 9019e3abb)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git from ca26807ca Bump pygments from 2.1.3 to 2.7.4 in /src/python/cli_new add 9019e3abb Bump json from 2.2.0 to 2.6.2 in /site No new revisions were added by this update. Summary of changes: site/Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[mesos] branch master updated: Bump pygments from 2.1.3 to 2.7.4 in /src/python/cli_new
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new ca26807ca Bump pygments from 2.1.3 to 2.7.4 in /src/python/cli_new ca26807ca is described below commit ca26807ca9bc090986774caa377a825141605ae8 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Mon May 23 01:54:26 2022 + Bump pygments from 2.1.3 to 2.7.4 in /src/python/cli_new Bumps [pygments](https://github.com/pygments/pygments) from 2.1.3 to 2.7.4. - [Release notes](https://github.com/pygments/pygments/releases) - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) - [Commits](https://github.com/pygments/pygments/compare/2.1.3...2.7.4) --- updated-dependencies: - dependency-name: pygments dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- src/python/cli_new/pip-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/cli_new/pip-requirements.txt b/src/python/cli_new/pip-requirements.txt index f0642b9b0..4681e3f05 100644 --- a/src/python/cli_new/pip-requirements.txt +++ b/src/python/cli_new/pip-requirements.txt @@ -5,7 +5,7 @@ isort==4.2.5 kazoo==2.5.0 lazy-object-proxy==1.2.2 parse==1.8.0 -Pygments==2.1.3 +Pygments==2.7.4 PyInstaller==3.4 six==1.10.0 tenacity==4.12.0
[mesos] branch master updated: Bump nokogiri from 1.10.4 to 1.13.6 in /site
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 9c7ccc285 Bump nokogiri from 1.10.4 to 1.13.6 in /site 9c7ccc285 is described below commit 9c7ccc285acc4120f67442a573552341a28d0474 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Wed May 18 22:02:11 2022 + Bump nokogiri from 1.10.4 to 1.13.6 in /site Bumps [nokogiri](https://github.com/sparklemotion/nokogiri) from 1.10.4 to 1.13.6. - [Release notes](https://github.com/sparklemotion/nokogiri/releases) - [Changelog](https://github.com/sparklemotion/nokogiri/blob/main/CHANGELOG.md) - [Commits](https://github.com/sparklemotion/nokogiri/compare/v1.10.4...v1.13.6) --- updated-dependencies: - dependency-name: nokogiri dependency-type: indirect ... Signed-off-by: dependabot[bot] --- site/Gemfile.lock | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/site/Gemfile.lock b/site/Gemfile.lock index 3cf5f472f..c62dcf389 100644 --- a/site/Gemfile.lock +++ b/site/Gemfile.lock @@ -94,17 +94,19 @@ GEM mime-types (3.2.2) mime-types-data (~> 3.2015) mime-types-data (3.2019.0331) -mini_portile2 (2.4.0) +mini_portile2 (2.8.0) minitest (5.11.3) multi_json (1.13.1) -nokogiri (1.10.4) - mini_portile2 (~> 2.4.0) +nokogiri (1.13.6) + mini_portile2 (~> 2.8.0) + racc (~> 1.4) padrino-helpers (0.12.9) i18n (~> 0.6, >= 0.6.7) padrino-support (= 0.12.9) tilt (>= 1.4.1, < 3) padrino-support (0.12.9) activesupport (>= 3.1) +racc (1.6.0) rack (1.6.12) rack-livereload (0.3.17) rack
[mesos] branch master updated: Fixed clang-tidy warnings due to capturing this in a deferred lambda.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new cd71826ab Fixed clang-tidy warnings due to capturing this in a deferred lambda. cd71826ab is described below commit cd71826ab244db1f73e78fafe8a42181758a41e8 Author: Charles-Francois Natali AuthorDate: Fri Apr 29 22:47:22 2022 +0100 Fixed clang-tidy warnings due to capturing this in a deferred lambda. Use `defer(self(), lambda)` instead to avoid the risk of use-after-free. See on the mesos-tidy CI job: ``` /tmp/SRC/src/csi/v0_volume_manager.cpp:1078:13: warning: callback capturing this should be dispatched/deferred to a specific PID [mesos-this-capture] .then([=](const Map& secrets) { ``` Together with the recent fixes merged, this fix should allow the mesos-tidy CI job to be green again: https://ci-builds.apache.org/job/Mesos/job/Mesos-Tidybot/ --- src/csi/v0_volume_manager.cpp | 4 ++-- src/csi/v1_volume_manager.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/csi/v0_volume_manager.cpp b/src/csi/v0_volume_manager.cpp index 3d5caa85f..170369184 100644 --- a/src/csi/v0_volume_manager.cpp +++ b/src/csi/v0_volume_manager.cpp @@ -1075,7 +1075,7 @@ Future VolumeManagerProcess::__publishVolume(const string& volumeId) if (!volumeState.node_stage_secrets().empty()) { rpcResult = resolveSecrets(volumeState.node_stage_secrets()) - .then([=](const Map& secrets) { + .then(process::defer(self(), [=](const Map& secrets) { NodeStageVolumeRequest request_(request); *request_.mutable_node_stage_secrets() = secrets; @@ -1083,7 +1083,7 @@ Future VolumeManagerProcess::__publishVolume(const string& volumeId) NODE_SERVICE, &Client::nodeStageVolume, std::move(request_)); - }); + })); } else { rpcResult = call(NODE_SERVICE, &Client::nodeStageVolume, std::move(request)); diff --git a/src/csi/v1_volume_manager.cpp b/src/csi/v1_volume_manager.cpp index c50fda2d9..85f3ea756 100644 --- a/src/csi/v1_volume_manager.cpp +++ b/src/csi/v1_volume_manager.cpp @@ -1114,7 +1114,7 @@ Future VolumeManagerProcess::__publishVolume(const string& volumeId) if (!volumeState.node_stage_secrets().empty()) { rpcResult = resolveSecrets(volumeState.node_stage_secrets()) - .then([=](const Map& secrets) { + .then(process::defer(self(), [=](const Map& secrets) { NodeStageVolumeRequest request_(request); *request_.mutable_secrets() = secrets; @@ -1122,7 +1122,7 @@ Future VolumeManagerProcess::__publishVolume(const string& volumeId) NODE_SERVICE, &Client::nodeStageVolume, std::move(request_)); - }); + })); } else { rpcResult = call(NODE_SERVICE, &Client::nodeStageVolume, std::move(request));
[mesos] branch master updated: Fixed a crash in Storage Local Resource ProviderProcess.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 286e9aae8 Fixed a crash in Storage Local Resource ProviderProcess. 286e9aae8 is described below commit 286e9aae82297cdba516eceaf2b91041c33a90f8 Author: Charles-Francois Natali AuthorDate: Mon Apr 18 20:11:30 2022 +0100 Fixed a crash in Storage Local Resource ProviderProcess. `StorageLocalResourceProviderProcess::connected` can crash on a check that the current state is `DISCONNECTED` if the current state is `READY`, which can happen if the periodic reconciliation runs after disconnection. It can be reproduced by running `ContentType/AgentResourceProviderConfigApiTest.Add/0` in a loop, preferably with some CPU-intensive workload in the background to affect the timing. Update the check to allow `READY` as well. --- src/resource_provider/storage/provider.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/resource_provider/storage/provider.cpp b/src/resource_provider/storage/provider.cpp index 1a202af95..b341f3e96 100644 --- a/src/resource_provider/storage/provider.cpp +++ b/src/resource_provider/storage/provider.cpp @@ -471,7 +471,8 @@ StorageLocalResourceProviderProcess::StorageLocalResourceProviderProcess( void StorageLocalResourceProviderProcess::connected() { - CHECK_EQ(DISCONNECTED, state); + CHECK(state == DISCONNECTED || state == READY) + << "Unexpected state: " << state; LOG(INFO) << "Connected to resource provider manager";
[mesos] branch master updated: CHANGE: remove bintray from docs.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 99ff1ae CHANGE: remove bintray from docs. 99ff1ae is described below commit 99ff1ae3b470a82267d7b5ccebd64ca0c77a85c6 Author: Andreas Peters AuthorDate: Fri Mar 25 09:03:41 2022 +0100 CHANGE: remove bintray from docs. --- docs/binary-packages.md | 25 +++-- docs/release-guide.md | 31 --- 2 files changed, 3 insertions(+), 53 deletions(-) diff --git a/docs/binary-packages.md b/docs/binary-packages.md index 07e0933..b189449 100644 --- a/docs/binary-packages.md +++ b/docs/binary-packages.md @@ -5,30 +5,11 @@ layout: documentation # Binary Packages -## Downloading the Mesos RPM +## Downloading the Mesos binaries -Download and install the latest stable RPM binary from the [Bintray Repository](https://bintray.com/apache/mesos/): +Download and install [the latest stable Mesos binaries](https://mesos.apache.org/downloads/). -$ cat > /tmp/bintray-mesos-el.repo <https://dl.bintray.com/apache/mesos/el7/x86_64 -gpgcheck=0 -repo_gpgcheck=0 -enabled=1 -EOF - -$ sudo mv /tmp/bintray-mesos-el.repo /etc/yum.repos.d/bintray-mesos-el.repo - -$ sudo yum update - -$ sudo yum install mesos - -The above instructions show how to install the latest version of Mesos for RHEL 7. -Substitute `baseurl` the with the appropriate URL for your operating system. - -## Start Mesos Master and Agent. +## Start Mesos Master and Agent The RPM installation creates the directory `/var/lib/mesos` that can be used as a work directory. diff --git a/docs/release-guide.md b/docs/release-guide.md index 49e1e74..4916bbb 100644 --- a/docs/release-guide.md +++ b/docs/release-guide.md @@ -309,37 +309,6 @@ Update the Mesos Homebrew package. 3. Once accepted, verify that `brew install mesos` works. -Upload the binary RPM packages to Bintray: - -1. If you haven't done so, sign up for bintray and request membership in the apache organization. - Get your API key by clicking on `Edit profile` and then `API Key` on Bintray - -2. Go to the [Bintray package](https://bintray.com/apache/mesos/mesos) and click the `Add a version` button. - Enter the version number as `Name`, set the appropriate release date and click `Create`. - -3. Go to [Apache Jenkins](https://builds.apache.org/job/Mesos/job/Packaging/job/CentosRPMs) and start a - manual run of the `CentosRPMs` job to generate official binary packages for this version. - -4. Upload the files generated in step 3 to Bintray. Note that you cannot use the web interface - for this step, since it has a file size limit of 250MiB as of the time of this writing. - - To upload, use a command like this, replacing file name and version number where necessary: - -curl\ - -u:\ - -H Content-Type:application/json \ - -H Accept:application/json\ - -T ./mesos-debuginfo-1.8.1-1.el7.x86_64.rpm \ - -H X-Bintray-Package:mesos\ - -H X-Bintray-Version:1.8.1\ - -H X-Bintray-Publish:0\ - https://api.bintray.com/content/apache/mesos/el7/x86_64/mesos-debuginfo-1.8.1-1.el7.x86_64.rpm - - (NOTE: The correct upload URL for the package is `apache/mesos/` whereas the web UI is at `apache/mesos/mesos/`.) - -5. Go back to the bintray web interface, verify that you uploaded the correct files to the correct location - and finally click on `Publish all`. - Update Wikipedia: 1. Update the [Wikipedia article](https://en.wikipedia.org/wiki/Apache_Mesos) to mention the
[mesos] branch master updated: ADD: unofficial repository.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 995c274 ADD: unofficial repository. 995c274 is described below commit 995c274bf5a3fb649622d85a771f3a22b4021650 Author: Andreas Peters AuthorDate: Wed Mar 23 08:21:20 2022 +0100 ADD: unofficial repository. --- site/source/downloads.html.erb | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/site/source/downloads.html.erb b/site/source/downloads.html.erb index 90a0149..3d64013 100644 --- a/site/source/downloads.html.erb +++ b/site/source/downloads.html.erb @@ -36,18 +36,30 @@ breadcrumb: Downloads - Once you have downloaded Mesos, follow the + +Once you have downloaded Mesos, follow the getting started instructions, and take a look at the rest of the Mesos documentation. Getting the code via source control - The codebase can also be downloaded from the Apache git repository at: + +The codebase can also be downloaded from the Apache git repository at: https://gitbox.apache.org/repos/asf/mesos.git. This repository is also mirrored on https://github.com/apache/mesos";>GitHub. + + Getting community driven Mesos binaries + +The following repositories are unofficial and not supportet by Apache. + + +Mesos binaries for CentOS (x86_64), AlmaLinux (x86_64, aarch64), Ubuntu (x86_64): https://rpm.aventer.biz";>https://rpm.aventer.biz + + +
[mesos] branch master updated: Added Capability.Type.QUOTA_V2 to v1 operator API.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 1a8a657 Added Capability.Type.QUOTA_V2 to v1 operator API. 1a8a657 is described below commit 1a8a65779c1d8edd38f011e2f7c948c97ebe2bc4 Author: Charles-Francois Natali AuthorDate: Thu Feb 17 09:00:10 2022 +0800 Added Capability.Type.QUOTA_V2 to v1 operator API. Closes #MESOS-10235. This closes #419 --- include/mesos/v1/mesos.proto | 4 1 file changed, 4 insertions(+) diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index f99cff3..80515c9 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -922,6 +922,10 @@ message MasterInfo { // The master can drain or deactivate agents when requested // via operator APIs. AGENT_DRAINING = 2; + + // The master can handle the new quota API, which supports setting + // limits separately from guarantees (introduced in Mesos 1.9). + QUOTA_V2 = 3; } optional Type type = 1; }
[mesos] branch master updated: Fixed NestedMesosContainerizerTest hangs on errors.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 52cfb8e Fixed NestedMesosContainerizerTest hangs on errors. 52cfb8e is described below commit 52cfb8ed0f3b48cadc90b7c15236c4c7e5489dd7 Author: Charles-Francois Natali AuthorDate: Mon Aug 9 22:19:09 2021 +0800 Fixed NestedMesosContainerizerTest hangs on errors. Those tests would use a named pipe to synchronize with the task being started. The problem is that if the task fails to start, reading from the pipe would block indefinitely, making the tests just hang. We could update the code to use a read with a timeout, however it's a bit fiddly and it's simpler to just use the presence as a regular file as a barrier. See https://issues.apache.org/jira/browse/MESOS-10226 for context. Tested by @martin-g This closes #402 --- .../nested_mesos_containerizer_tests.cpp | 59 -- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/tests/containerizer/nested_mesos_containerizer_tests.cpp b/src/tests/containerizer/nested_mesos_containerizer_tests.cpp index 8aaf80a..4731596 100644 --- a/src/tests/containerizer/nested_mesos_containerizer_tests.cpp +++ b/src/tests/containerizer/nested_mesos_containerizer_tests.cpp @@ -157,6 +157,23 @@ protected: return containerConfig; } + + static bool awaitSynchronizationFile(const string& path) + { +Duration waited = Duration::zero(); +Duration interval = Milliseconds(1); + +do { + if (os::exists(path)) { +return true; + } + + os::sleep(interval); + waited += interval; +} while (waited < process::TEST_AWAIT_TIMEOUT); + +return false; + } }; @@ -663,15 +680,14 @@ TEST_F(NestedMesosContainerizerTest, ContainerID containerId; containerId.set_value(id::UUID::random().toString()); - // Use a pipe to synchronize with the top-level container. - string pipe = path::join(sandbox.get(), "pipe"); - ASSERT_EQ(0, ::mkfifo(pipe.c_str(), 0700)); + // Use a file to synchronize with the top-level container. + string syncFile = path::join(sandbox.get(), "syncFile"); const string filename = "nested_inherits_work_dir"; ExecutorInfo executor = createExecutorInfo( "executor", - "touch " + filename + "; echo running > " + pipe + "; sleep 1000", + "touch " + filename + "; touch " + syncFile + "; sleep 1000", "cpus:1"); Try directory = environment->mkdtemp(); @@ -695,9 +711,7 @@ TEST_F(NestedMesosContainerizerTest, // Wait for the parent container to start running its task // before launching a debug container inside it. - Result read = os::read(pipe); - ASSERT_SOME(read); - ASSERT_EQ("running\n", read.get()); + ASSERT_TRUE(awaitSynchronizationFile(syncFile)); Future status = containerizer->status(containerId); AWAIT_READY(status); @@ -1093,15 +1107,14 @@ TEST_F(NestedMesosContainerizerTest, AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); - // Use a pipe to synchronize with the top-level container. - string pipe = path::join(sandbox.get(), "pipe"); - ASSERT_EQ(0, ::mkfifo(pipe.c_str(), 0700)); + // Use a file to synchronize with the top-level container. + string syncFile = path::join(sandbox.get(), "syncFile"); // Launch a command task within the `alpine` docker image. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), - "echo running > /tmp/pipe; sleep 1000"); + "touch /tmp/syncFile; sleep 1000"); task.mutable_container()->CopyFrom(createContainerInfo( "alpine", {createVolumeHostPath("/tmp", sandbox.get(), Volume::RW)})); @@ -1123,9 +1136,7 @@ TEST_F(NestedMesosContainerizerTest, // Wait for the parent container to start running its task // before launching a debug container inside it. - Result read = os::read(pipe); - ASSERT_SOME(read); - ASSERT_EQ("running\n", read.get()); + ASSERT_TRUE(awaitSynchronizationFile(syncFile)); ASSERT_TRUE(statusRunning->has_slave_id()); ASSERT_TRUE(statusRunning->has_container_status()); @@ -1207,14 +1218,13 @@ TEST_F(NestedMesosContainerizerTest, ContainerID containerId; containerId.set_value(id::UUID::random().toString()); - string pipe = path::join(sandbox.get(), "pipe"); - ASSERT_EQ(0, ::mkfifo(pipe.c_str(), 0700)); + string syncFile = path::join(sandbox.get(), "syncFile"); const string cmd = "(unshare -m sh -c" " 'mkdir -p test_mnt; mount tmpfs
[mesos] branch master updated: LinkedHashMap: fixed handling of self-assignment.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new f343811 LinkedHashMap: fixed handling of self-assignment. f343811 is described below commit f34381173dbb90a14ebdea58bc52ee8bd2bba64f Author: Charles-Francois Natali AuthorDate: Tue Jul 27 20:39:21 2021 +0800 LinkedHashMap: fixed handling of self-assignment. Self-assigning a LinkedHashMap i.e. `map = map` would cause the map to be cleared. Found with clang-tidy. This closes #400 --- 3rdparty/stout/include/stout/linkedhashmap.hpp | 12 +++- 3rdparty/stout/tests/linkedhashmap_tests.cpp | 7 +++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/3rdparty/stout/include/stout/linkedhashmap.hpp b/3rdparty/stout/include/stout/linkedhashmap.hpp index 6bf71ad..a4d86c6 100644 --- a/3rdparty/stout/include/stout/linkedhashmap.hpp +++ b/3rdparty/stout/include/stout/linkedhashmap.hpp @@ -46,13 +46,15 @@ public: LinkedHashMap& operator=(const LinkedHashMap& other) { -clear(); +if (this != &other) { + clear(); -entries_ = other.entries_; + entries_ = other.entries_; -// Build up the index. -for (auto it = entries_.begin(); it != entries_.end(); ++it) { - keys_[it->first] = it; + // Build up the index. + for (auto it = entries_.begin(); it != entries_.end(); ++it) { +keys_[it->first] = it; + } } return *this; diff --git a/3rdparty/stout/tests/linkedhashmap_tests.cpp b/3rdparty/stout/tests/linkedhashmap_tests.cpp index a48d97a..e9179de 100644 --- a/3rdparty/stout/tests/linkedhashmap_tests.cpp +++ b/3rdparty/stout/tests/linkedhashmap_tests.cpp @@ -236,4 +236,11 @@ TEST(LinkedHashMapTest, Assignment) EXPECT_NE(map.keys(), copy.keys()); EXPECT_NE(map.values(), copy.values()); + + // Test self-assignment. + copy = map; + map = map; + + EXPECT_EQ(copy.keys(), map.keys()); + EXPECT_EQ(copy.values(), map.values()); }
[mesos] branch master updated: Fixed a bug where the cgroup task killer leaves the cgroup frozen.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 8a344da Fixed a bug where the cgroup task killer leaves the cgroup frozen. 8a344da is described below commit 8a344da266bdded895f73d7ab189e868d326f1e5 Author: Charles-Francois Natali AuthorDate: Wed Jul 21 21:13:55 2021 +0800 Fixed a bug where the cgroup task killer leaves the cgroup frozen. This closes #388 --- src/linux/cgroups.cpp | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp index 43d8ac0..11626cf 100644 --- a/src/linux/cgroups.cpp +++ b/src/linux/cgroups.cpp @@ -1398,15 +1398,14 @@ public: // Return a future indicating the state of the killer. // Failure occurs if any process in the cgroup is unable to be // killed. + // Discarding the future will cause this process to stop the next time it + // calls `freeze`: we don't want to stop at an arbitrary point since it might + // leave the cgroup frozen. Future future() { return promise.future(); } protected: void initialize() override { -// Stop when no one cares. -promise.future().onDiscard(lambda::bind( -static_cast(terminate), self(), true)); - killTasks(); } @@ -1425,16 +1424,15 @@ private: const PID& pid) { // Cancel the freeze operation. -// TODO(jieyu): Wait until 'future' is in DISCARDED state before -// starting retry. future.discard(); -// We attempt to kill the processes before we thaw again, -// due to a bug in the kernel. See MESOS-1758 for more details. -// We thaw the cgroup before trying to freeze again to allow any -// pending signals to be delivered. See MESOS-1689 for details. -// This is a short term hack until we have PID namespace support. -return Future(true) +// Wait until the freeze is cancelled, and then attempt to kill the +// processes before we thaw again, due to a bug in the kernel. See +// MESOS-1758 for more details. We thaw the cgroup before trying to freeze +// again to allow any pending signals to be delivered. See MESOS-1689 for +// details. This is a short term hack until we have PID namespace support. +return future + .recover([](const Future&){ return Future(Nothing()); }) .then(defer(pid, &Self::kill)) .then(defer(pid, &Self::thaw)) .then(defer(pid, &Self::freeze)); @@ -1452,6 +1450,12 @@ private: Future freeze() { +// Don't start another `killTasks` cycle if we've been asked to stop. +if (promise.future().hasDiscard()) { +terminate(self()); +return Nothing(); +} + // TODO(jieyu): This is a workaround for MESOS-1689. We will move // away from freezer once we have pid namespace support. return cgroups::freezer::freeze(hierarchy, cgroup).after(
[mesos] branch master updated: Added Mesos authentication to the Mesos cli
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new c66aaf4 Added Mesos authentication to the Mesos cli c66aaf4 is described below commit c66aaf47768aa0e5b66ee9d830f634f4dd682c43 Author: Andreas Peters AuthorDate: Wed Jul 21 21:05:39 2021 +0800 Added Mesos authentication to the Mesos cli The following points I have done: - Add authentication against Mesos master and agent. - Add option to skip SSL verification of the mesos-agent. - Change the order of "task list" to get back more running states. This closes #383 --- src/python/cli_new/README.md | 10 +++ src/python/cli_new/lib/cli/config.py | 86 src/python/cli_new/lib/cli/http.py | 72 ++-- src/python/cli_new/lib/cli/mesos.py | 39 +++ src/python/cli_new/lib/cli/plugins/agent/main.py | 3 +- src/python/cli_new/lib/cli/plugins/task/main.py | 9 ++- src/python/cli_new/lib/cli/tests/agent.py| 2 +- src/python/cli_new/lib/cli/tests/base.py | 4 +- src/python/cli_new/lib/cli/tests/task.py | 10 +-- src/python/cli_new/pip-requirements.txt | 2 - src/python/cli_new/tox.ini | 3 +- 11 files changed, 176 insertions(+), 64 deletions(-) diff --git a/src/python/cli_new/README.md b/src/python/cli_new/README.md index 0e6c716..7ac22b2 100644 --- a/src/python/cli_new/README.md +++ b/src/python/cli_new/README.md @@ -104,6 +104,9 @@ plugins = [ # `address` or `zookeeper` field, but not both. For example: [master] address = "10.10.0.30:5050" + principal = "username" + secret = "password" + # The `zookeeper` field has an `addresses` array and a `path` field. # [master.zookeeper] # addresses = [ @@ -112,6 +115,13 @@ plugins = [ # "10.10.0.33:5050" # ] # path = "/mesos" + +[agent] + ssl = true + ssl_verify = false + principal = "username" + secret = "password" + timeout = 5 ``` You can override the location of this configuration file using diff --git a/src/python/cli_new/lib/cli/config.py b/src/python/cli_new/lib/cli/config.py index 7f41736..fa6c8ff 100644 --- a/src/python/cli_new/lib/cli/config.py +++ b/src/python/cli_new/lib/cli/config.py @@ -21,6 +21,7 @@ Config class to manage the configuration file. import os import toml +import requests import cli from cli.constants import DEFAULT_MASTER_IP @@ -119,6 +120,79 @@ class Config(): return master +def principal(self): +""" +Return the principal in the configuration file +""" +return self.data["master"].get("principal") + +def secret(self): +""" +Return the secret in the configuration file +""" +return self.data["master"].get("secret") + +def agent_ssl(self, default=False): +""" +Return if the agent support ssl +""" +if "agent" in self.data: +agent_ssl = self.data["agent"].get("ssl", default) +if not isinstance(agent_ssl, bool): +raise CLIException("The 'agent->ssl' field" + " must be True/False") + +return agent_ssl + +return default + +def agent_ssl_verify(self, default=False): +""" +Return if the ssl certificate should be verified +""" +if "agent" in self.data: +ssl_verify = self.data["agent"].get("ssl_verify", default) +if not isinstance(ssl_verify, bool): +raise CLIException("The 'agent->ssl_verify' field" + " must be True/False") + +return ssl_verify + +return default + +def agent_timeout(self, default=5): +""" +Return the connection timeout of the agent +""" +if "agent" in self.data: +timeout = self.data["agent"].get("timeout", default) +if not isinstance(timeout, int): +raise CLIException("The 'agent->timeout' field" + " must be a number in seconds") + +return timeout + +return default + + +def agent_principal(self): +""" +Return the principal in the configuration file +""
[mesos] branch master updated: Updated documentation for Systemd's parameter `delegate`
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new f244030 Updated documentation for Systemd's parameter `delegate` f244030 is described below commit f24403039395d408cc473f01955865388a59d3cb Author: Andreas Peters AuthorDate: Wed Jul 14 19:45:45 2021 +0800 Updated documentation for Systemd's parameter `delegate` This closes #398 --- docs/mesos-containerizer.md | 11 +++ support/packaging/common/mesos-slave.service | 1 + 2 files changed, 12 insertions(+) diff --git a/docs/mesos-containerizer.md b/docs/mesos-containerizer.md index c632550..9a7b25d 100644 --- a/docs/mesos-containerizer.md +++ b/docs/mesos-containerizer.md @@ -66,3 +66,14 @@ Mesos supports the following built-in isolators. - [volume/secret](secrets.md#file-based-secrets) - [windows/cpu](isolators/windows.md#cpu-limits) - [windows/mem](isolators/windows.md#memory-limits) + +## Systemd Integration + +To prevent systemd from manipulating cgroups managed by the agent, +it's recommended to add 'Delegate' under 'Service' in the service +unit file of Mesos agent, for example: + +``` +[Service] +Delegate=true +``` diff --git a/support/packaging/common/mesos-slave.service b/support/packaging/common/mesos-slave.service index 99c2728..a3d9320 100644 --- a/support/packaging/common/mesos-slave.service +++ b/support/packaging/common/mesos-slave.service @@ -11,6 +11,7 @@ RestartSec=20 LimitNOFILE=16384 CPUAccounting=true MemoryAccounting=true +Delegate=true [Install] WantedBy=multi-user.target
[mesos] branch master updated: Added asf.yaml configuration for the new website handling.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new ad02e47 Added asf.yaml configuration for the new website handling. ad02e47 is described below commit ad02e47ae55b70904c84e46db5688b9359785c2e Author: Andreas Peters AuthorDate: Mon Jul 5 20:53:53 2021 +0800 Added asf.yaml configuration for the new website handling. This patch will persist the .asf.yaml configuration for the new website publishing mechanism (https://github.com/apache/mesos-site/pull/2). This closes #399 --- site/config.rb | 6 ++ site/source/asf.yaml.erb | 2 ++ 2 files changed, 8 insertions(+) diff --git a/site/config.rb b/site/config.rb index 04bc7aa..01ea84a 100644 --- a/site/config.rb +++ b/site/config.rb @@ -59,3 +59,9 @@ page "/api/*", :directory_index => false activate :directory_indexes activate :syntax activate :livereload + +page "asf.yaml", :layout => false + +after_build do + File.rename 'publish/asf.yaml', 'publish/.asf.yaml' +end diff --git a/site/source/asf.yaml.erb b/site/source/asf.yaml.erb new file mode 100644 index 000..0735550 --- /dev/null +++ b/site/source/asf.yaml.erb @@ -0,0 +1,2 @@ +publish: + whoami: asf-site
[mesos] branch master updated: Added Saad Ur Rahman to contributors list.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 1b445c3 Added Saad Ur Rahman to contributors list. 1b445c3 is described below commit 1b445c392a25e6c556acbf2bd8ed5214250ddace Author: Saad Ur Rahman AuthorDate: Fri Jun 25 22:02:53 2021 +0800 Added Saad Ur Rahman to contributors list. This closes #396 --- docs/contributors.yaml | 6 ++ 1 file changed, 6 insertions(+) diff --git a/docs/contributors.yaml b/docs/contributors.yaml index da8930f..1c8e71d 100644 --- a/docs/contributors.yaml +++ b/docs/contributors.yaml @@ -776,6 +776,12 @@ jira_user: okuryu reviewboard_user: okuryu +- name: Saad Ur Rahman + emails: +- saad.ur.rah...@gmail.com + jira_user: surahman + reviewboard_user: saad.ur.rahman + - name: Sachin Paryani emails: - sachin.pary...@gmail.com
[mesos] branch master updated: Fixed crashes on ARM64 due to bad interaction of libunwind with libgcc.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new e6dd2de Fixed crashes on ARM64 due to bad interaction of libunwind with libgcc. e6dd2de is described below commit e6dd2de5af3f982f336a2464c6e3b5ff8c34a33d Author: Charles-Francois Natali AuthorDate: Fri Jun 25 21:15:58 2021 +0800 Fixed crashes on ARM64 due to bad interaction of libunwind with libgcc. Closes MESOS-10223. @qianzhangxa This closes #395 --- configure.ac | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index b346208..b2e0381 100644 --- a/configure.ac +++ b/configure.ac @@ -857,11 +857,19 @@ AS_IF([test "x$enable_parallel_test_execution" = "xyes"], AS_IF([test "x$enable_werror" != "xno"], [AC_SUBST([WERROR], [-Werror])]) -# Check for libunwind, and link it in if present. +# Check for libunwind - used by glog for tracebacks - and link it in if +# present. +# `-lgcc_s` is necessary on e.g. ARM64 and IA-64, see +# https://bugzilla.redhat.com/show_bug.cgi?id=480412 +# Notes: +# - Omitting it causes glog to link it anyway, resulting in the same problem. +# - If `-lgcc_s` isn't available, e.g. if compiling with a pure LLVM toolchain, +# then we won't link it here but glog will link it anyway, but it won't suffer +# from the problem which affects only libgcc. if test "x$OS_NAME" = "xfreebsd"; then AC_CHECK_LIB(execinfo, backtrace, LIBS="$LIBS -lexecinfo") else - AC_CHECK_LIB(unwind, backtrace, LIBS="$LIBS -lunwind") + AC_CHECK_LIB(unwind, backtrace, LIBS="$LIBS -lgcc_s -lunwind") fi
[mesos] branch master updated: Fixed `ldcache.parse` to handle excess tail data in Ubuntu
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 3777b3a Fixed `ldcache.parse` to handle excess tail data in Ubuntu 3777b3a is described below commit 3777b3a0ca01972d80aac435e057c4fe20ef8b51 Author: Saad Ur Rahman AuthorDate: Thu Jun 24 20:35:22 2021 +0800 Fixed `ldcache.parse` to handle excess tail data in Ubuntu **_[MESOS-10244](https://issues.apache.org/jira/browse/MESOS-10224)_** There is excess data on the tail end of the `ld.cache.so` file on `Ubuntu 21.04`. With this fix the `data` pointer check will not fail if it falls short of the end of the cache's buffer end. This closes #394 --- src/linux/ldcache.cpp | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/linux/ldcache.cpp b/src/linux/ldcache.cpp index 5a3b2f5..9c719f0 100644 --- a/src/linux/ldcache.cpp +++ b/src/linux/ldcache.cpp @@ -222,16 +222,16 @@ Try> parse(const string& path) // Adjust the pointer to add on the additional size of the strings // contained in the string table. At this point, 'data' should - // point to an address just beyond the end of the file. + // point to an address just inside or beyond the end of the file. data += headerNew->stringsLength; - if ((size_t)(data - buffer->data()) != buffer->size()) { + if ((size_t)(data - buffer->data()) > buffer->size()) { return Error("Invalid format"); } - // Make sure the very last character in the buffer is a '\0'. + // Make sure the prior character to the data pointer is a '\0'. // This way, no matter what strings we index in the string // table, we know they will never run beyond the end of the - // file buffer when extracting them. + // useful data in the buffer when extracting them. if (*(data - 1) != '\0') { return Error("Invalid format"); }
[mesos] branch master updated: Backported fix for picojson -Wparentheses warning with recent GCC.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 2df3409 Backported fix for picojson -Wparentheses warning with recent GCC. 2df3409 is described below commit 2df3409b7eef3caa495f071a0a26f07a9d08327e Author: Charles-Francois Natali AuthorDate: Sun Jun 13 21:48:53 2021 +0800 Backported fix for picojson -Wparentheses warning with recent GCC. This closes #392 --- 3rdparty/picojson-1.3.0.patch | 25 + 1 file changed, 25 insertions(+) diff --git a/3rdparty/picojson-1.3.0.patch b/3rdparty/picojson-1.3.0.patch index 5d1716a..d3219d6 100644 --- a/3rdparty/picojson-1.3.0.patch +++ b/3rdparty/picojson-1.3.0.patch @@ -204,3 +204,28 @@ index ed9656d..b04ed59 100644 } -- 2.17.1 +From 62b52d4c43531cfb285c2c72c785485b84911867 Mon Sep 17 00:00:00 2001 +From: Charles-Francois Natali +Date: Sat, 12 Jun 2021 18:50:18 +0100 +Subject: [PATCH] Backported fix for -Wparentheses warning with recent GCC + versions. + +--- + picojson.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/picojson.h b/picojson.h +index 48bb64e..ae4fda6 100644 +--- a/picojson.h b/picojson.h +@@ -301,7 +301,7 @@ namespace picojson { + GET(array, *u_.array_) + GET(object, *u_.object_) + #ifdef PICOJSON_USE_INT64 +- GET(double, (type_ == int64_type && (const_cast(this)->type_ = number_type, const_cast(this)->u_.number_ = u_.int64_), u_.number_)) ++ GET(double, (type_ == int64_type && (const_cast(this)->type_ = number_type, (const_cast(this)->u_.number_ = u_.int64_)), u_.number_)) + GET(int64_t, u_.int64_) + #else + GET(double, u_.number_) +-- +2.30.2
[mesos] branch master updated: Backported a boost-mpl commit to ignore GCC's -Wparentheses.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 7141572 Backported a boost-mpl commit to ignore GCC's -Wparentheses. 7141572 is described below commit 7141572d64cc43d3aafe2b4f5de7492cc0803b78 Author: Charles-Francois Natali AuthorDate: Tue Jun 8 22:14:02 2021 +0800 Backported a boost-mpl commit to ignore GCC's -Wparentheses. In order to allow us to build with `-Werror`. Since boost is quite a large dependency and the change is so small, it felt much simpler and less risky than updating boost just to silence this warning. See https://github.com/boostorg/mpl/pull/34 for the upstream fix. This together with https://github.com/apache/mesos/pull/392 allows to build with `-Werror` using this somewhat recent gcc: ``` gcc (Debian 10.2.1-6) 10.2.1 20210110 ``` @asekretenko @qianzhangxa This closes #393 --- 3rdparty/boost-1.65.0.patch | 46 + 1 file changed, 46 insertions(+) diff --git a/3rdparty/boost-1.65.0.patch b/3rdparty/boost-1.65.0.patch index 75be3bc..a351fad 100644 --- a/3rdparty/boost-1.65.0.patch +++ b/3rdparty/boost-1.65.0.patch @@ -16,3 +16,49 @@ index c0557de71..c06eaea45 100644 -# pragma message("Unknown compiler version - please run the configure tests and report the results") -# endif -#endif +From f48fd09d021db9a28bd7b8452c175897e1af4485 Mon Sep 17 00:00:00 2001 +From: Romain Geissler +Date: Thu, 1 Mar 2018 09:59:33 +0100 +Subject: [PATCH] Ignore gcc 8 warnings. + +--- + boost/mpl/assert.hpp | 15 +-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/boost/mpl/assert.hpp b/boost/mpl/assert.hpp +index 1af1b058..e41b583c 100644 +--- a/boost/mpl/assert.hpp b/boost/mpl/assert.hpp +@@ -184,16 +184,27 @@ template< typename P > struct assert_arg_pred_not + typedef typename assert_arg_pred_impl::type type; + }; + ++#if defined(BOOST_GCC) && BOOST_GCC >= 8 ++#define BOOST_MPL_IGNORE_PARENTHESES_WARNING ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wparentheses" ++#endif ++ + template< typename Pred > +-failed (Pred:: ++failed (Pred:: + assert_arg( void (*)(Pred), typename assert_arg_pred::type ) + ); + + template< typename Pred > +-failed (boost::mpl::not_:: ++failed (boost::mpl::not_:: + assert_not_arg( void (*)(Pred), typename assert_arg_pred_not::type ) + ); + ++#ifdef BOOST_MPL_IGNORE_PARENTHESES_WARNING ++#undef BOOST_MPL_IGNORE_PARENTHESES_WARNING ++#pragma GCC diagnostic pop ++#endif ++ + template< typename Pred > + AUX778076_ASSERT_ARG(assert) + assert_arg( void (*)(Pred), typename assert_arg_pred_not::type ); +-- +2.30.2 +
[mesos] branch master updated: Fixed parsing of `perf` output on some locales.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 7841fcc Fixed parsing of `perf` output on some locales. 7841fcc is described below commit 7841fcc848ebaac5de43cd4cccf1c243a3cdff56 Author: Charles-Francois Natali AuthorDate: Fri Jun 4 22:17:52 2021 +0800 Fixed parsing of `perf` output on some locales. If the locale is such that `LC_NUMERIC` uses the comma ',' as decimal separator, parsing won't work - because of unexpected number of fields and floating points format - so make sure it's set to `C`. Example: ``` [ RUN ] CgroupsAnyHierarchyWithPerfEventTest.ROOT_CGROUPS_PERF_PerfTest ../../src/tests/containerizer/cgroups_tests.cpp:1024: Failure (statistics).failure(): Failed to parse perf sample: Failed to parse perf sample line '6376827291,,cycles,mesos_test,2011741096,100,00,3,GHz': Unexpected number of fields (9) [ FAILED ] CgroupsAnyHierarchyWithPerfEventTest.ROOT_CGROUPS_PERF_PerfTest (2157 ms) ``` Standalone reproducer, using '/' as separator for readability: ``` root@thinkpad:~# LC_NUMERIC=fr_FR.UTF-8 perf stat --field-separator "/" -- true 0,31/msec/task-clock/306721/100,00/0/CPUs utilized 0//context-switches/306721/100,00/0/K/sec 0//cpu-migrations/306721/100,00/0/K/sec 44//page-faults/306721/100,00/0/M/sec 788234//cycles/311478/100,00/2/GHz 538077//instructions/311478/100,00/0/insn per cycle 106749//branches/311478/100,00/348/M/sec 4556//branch-misses/311478/100,00/4/of all branches ``` This closes #391 --- src/linux/perf.cpp | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/linux/perf.cpp b/src/linux/perf.cpp index dfc4670..a6ead07 100644 --- a/src/linux/perf.cpp +++ b/src/linux/perf.cpp @@ -125,6 +125,12 @@ protected: private: void execute() { +// If the locale is such that `LC_NUMERIC` uses the comma ',' as decimal +// separator, parsing won't work - because of unexpected number of fields +// and floating points format - so make sure it's set to `C`. +std::map env = os::environment(); +env["LC_ALL"] = "C"; + // NOTE: The supervisor childhook places perf in its own process group // and will kill the perf process when the parent dies. Try _perf = subprocess( @@ -134,7 +140,7 @@ private: Subprocess::PIPE(), Subprocess::PIPE(), nullptr, -None(), +env, None(), {}, {Subprocess::ChildHook::SUPERVISOR()});
[mesos] branch master updated: Fixed parsing of ld.so.cache on new glibc.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 9598db0 Fixed parsing of ld.so.cache on new glibc. 9598db0 is described below commit 9598db054e50f10d1ff1612741966f679caf441d Author: Charles-Francois Natali AuthorDate: Mon May 31 21:23:29 2021 +0800 Fixed parsing of ld.so.cache on new glibc. Since glibc 2.32, `ld.so.cache` now defaults to the "new" format, instead of the "compat" format which was in use since glibc 2.2 (around 20 years ago). It is now the default on e.g. Debian bullseye, and any recent Linux distribution. The code change adds support for the "new" format along with the existing support for the "compat". Before: ``` root@thinkpad:/home/cf/src/mesos/build# ldconfig -c new root@thinkpad:/home/cf/src/mesos/build# ./bin/mesos-tests.sh --gtest_filter=*Ld* [...] [==] Running 4 tests from 2 test cases. [--] Global test environment set-up. [--] 1 test from LdcacheTest [ RUN ] LdcacheTest.Parse ../../src/tests/ldcache_tests.cpp:43: Failure cache: Invalid format [ FAILED ] LdcacheTest.Parse (0 ms) [--] 1 test from LdcacheTest (0 ms total) [--] 3 tests from Ldd [ RUN ] Ldd.BinSh ../../src/tests/ldd_tests.cpp:43: Failure cache: Invalid format [ FAILED ] Ldd.BinSh (0 ms) [ RUN ] Ldd.EmptyCache [ OK ] Ldd.EmptyCache (1 ms) [ RUN ] Ldd.MissingFile ../../src/tests/ldd_tests.cpp:77: Failure cache: Invalid format [ FAILED ] Ldd.MissingFile (0 ms) [--] 3 tests from Ldd (1 ms total) [--] Global test environment tear-down [==] 4 tests from 2 test cases ran. (8 ms total) [ PASSED ] 1 test. [ FAILED ] 3 tests, listed below: [ FAILED ] LdcacheTest.Parse [ FAILED ] Ldd.BinSh [ FAILED ] Ldd.MissingFile 3 FAILED TESTS ``` After: ``` root@thinkpad:/home/cf/src/mesos/build# ldconfig -c new root@thinkpad:/home/cf/src/mesos/build# ./bin/mesos-tests.sh --gtest_filter=*Ld* [...] [==] Running 4 tests from 2 test cases. [--] Global test environment set-up. [--] 1 test from LdcacheTest [ RUN ] LdcacheTest.Parse [ OK ] LdcacheTest.Parse (529 ms) [--] 1 test from LdcacheTest (529 ms total) [--] 3 tests from Ldd [ RUN ] Ldd.BinSh [ OK ] Ldd.BinSh (3 ms) [ RUN ] Ldd.EmptyCache [ OK ] Ldd.EmptyCache (0 ms) [ RUN ] Ldd.MissingFile [ OK ] Ldd.MissingFile (0 ms) [--] 3 tests from Ldd (3 ms total) [--] Global test environment tear-down [==] 4 tests from 2 test cases ran. (541 ms total) [ PASSED ] 4 tests. ``` This closes #384 --- src/linux/ldcache.cpp | 115 +- 1 file changed, 67 insertions(+), 48 deletions(-) diff --git a/src/linux/ldcache.cpp b/src/linux/ldcache.cpp index e933344..5a3b2f5 100644 --- a/src/linux/ldcache.cpp +++ b/src/linux/ldcache.cpp @@ -32,10 +32,11 @@ using std::string; using std::vector; -// There are two formats for ld.so.cache. The pre-glibc format 2.2 -// listed the number of library entries, followed by the entries -// themselves, followed by a string table holding strings pointed -// to by the library entries. This format is summarized below: +// There are three formats for ld.so.cache. The "old" pre-glibc +// format 2.2 listed the number of library entries, followed by +// the entries themselves, followed by a string table holding +// strings pointed to by the library entries. This format is +// summarized below: // // HEADER_MAGIC_OLD // nlibs @@ -46,21 +47,22 @@ using std::vector; // ^ ^ // start of string table end of string table // -// For glibc 2.2 and beyond, a new format was created so that each +// For glibc 2.2 and beyond, a "new" format was created so that each // library entry could hold more meta-data about the libraries they -// reference. To preserve backwards compatibility, the new format was -// embedded in the old format inside its string table (simply moving -// all existing strings further down in the string table). This makes -// sense for backwards compatibility because code that could parse the -// old format still works (the offsets for strings pointed to by -// the library entries are just larger now). +// reference. To preserve backwards compatibility, a "compat" format +// was introduced where the new format was embedded in the old +// format inside its st
[mesos] branch master updated: Ignored the directoy `/dev/nvidia-caps` when globing Nvidia GPU devices.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 301902b Ignored the directoy `/dev/nvidia-caps` when globing Nvidia GPU devices. 301902b is described below commit 301902be4f1332799cf3b3242cd29b4907c21c09 Author: Qian Zhang AuthorDate: Sat Oct 10 15:04:57 2020 +0800 Ignored the directoy `/dev/nvidia-caps` when globing Nvidia GPU devices. The directory `/dev/nvidia-caps` was introduced in CUDA 11.0, just ignore it since we only care about the Nvidia GPU device files. Review: https://reviews.apache.org/r/72945 --- src/slave/containerizer/mesos/isolators/gpu/isolator.cpp | 9 + 1 file changed, 9 insertions(+) diff --git a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp index a0be102..99119f9 100644 --- a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp @@ -443,6 +443,15 @@ Future> NvidiaGpuIsolatorProcess::_prepare( } foreach (const string& device, nvidia.get()) { +// The directory `/dev/nvidia-caps` was introduced in CUDA 11.0, just +// ignore it since we only care about the Nvidia GPU device files. +// +// TODO(qianzhang): Figure out how to handle the directory +// `/dev/nvidia-caps` more properly. +if (device == "/dev/nvidia-caps") { + continue; +} + const string devicePath = path::join( devicesDir, strings::remove(device, "/dev/", strings::PREFIX), device);
[mesos] 02/02: Corrected the example of the managed CSI plugin.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 90e5434544da9886cd6f2d87b73e3246292af107 Author: Qian Zhang AuthorDate: Tue Oct 13 09:58:44 2020 +0800 Corrected the example of the managed CSI plugin. Review: https://reviews.apache.org/r/72846 --- docs/configuration/agent.md | 8 +--- src/slave/flags.cpp | 8 +--- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/docs/configuration/agent.md b/docs/configuration/agent.md index 4899202..a8286d1 100644 --- a/docs/configuration/agent.md +++ b/docs/configuration/agent.md @@ -1528,16 +1528,10 @@ Example config files in this directory: "containers": [ { "services": [ -"CONTROLLER_SERVICE", "NODE_SERVICE" ], "command": { -"shell": false, -"value": "managed-plugin", -"arguments": [ - "managed-plugin", - "--endpoint=$(CSI_ENDPOINT)" -] +"value": " --endpoint=$CSI_ENDPOINT" }, "resources": [ {"name": "cpus", "type": "SCALAR", "scalar": {"value": 0.1}}, diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp index 878788c..e66b464 100644 --- a/src/slave/flags.cpp +++ b/src/slave/flags.cpp @@ -129,16 +129,10 @@ mesos::internal::slave::Flags::Flags() " \"containers\": [\n" "{\n" " \"services\": [\n" - "\"CONTROLLER_SERVICE\",\n" "\"NODE_SERVICE\"\n" " ],\n" " \"command\": {\n" - "\"shell\": false,\n" - "\"value\": \"managed-plugin\",\n" - "\"arguments\": [\n" - " \"managed-plugin\",\n" - " \"--endpoint=$(CSI_ENDPOINT)\"\n" - "]\n" + "\"value\": \" --endpoint=$CSI_ENDPOINT\"\n" " },\n" " \"resources\": [\n" "{\"name\": \"cpus\", \"type\": \"SCALAR\", \"scalar\": {\"value\": 0.1}},\n" // NOLINT(whitespace/line_length)
[mesos] 01/02: Added doc for the `volume/csi` isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 3e1e0b37d6a30a2c98d1227b4ac754b1d26686f3 Author: Qian Zhang AuthorDate: Wed Sep 9 10:26:52 2020 +0800 Added doc for the `volume/csi` isolator. Review: https://reviews.apache.org/r/72845 --- docs/isolators/csi-volume.md | 339 +++ docs/mesos-containerizer.md | 1 + 2 files changed, 340 insertions(+) diff --git a/docs/isolators/csi-volume.md b/docs/isolators/csi-volume.md new file mode 100644 index 000..1b1a635 --- /dev/null +++ b/docs/isolators/csi-volume.md @@ -0,0 +1,339 @@ +--- +title: Apache Mesos - CSI Volume Support in Mesos Containerizer +layout: documentation +--- + +# Pre-provisioned CSI Volume Support in Mesos Containerizer + +Mesos 1.11.0 adds pre-provisioned CSI volume support to the +[MesosContainerizer](../mesos-containerizer.md) (a.k.a., the universal +containerizer) by introducing the new `volume/csi` isolator. + +This document describes the motivation and the configuration steps for enabling +the `volume/csi` isolator, and required framework changes. + +## Table of Contents +- [Motivation](#motivation) +- [How does it work?](#how-does-it-work) +- [Configuration](#configuration) + - [Pre-conditions](#pre-conditions) + - [Configuring CSI Volume Isolator](#configure-csi-volume-isolator) + - [Enabling frameworks to use CSI volumes](#enable-frameworks) +- [Volume Protobuf](#volume-protobuf) +- [Example](#example) + +## Motivation + +[Container Storage Interface](https://github.com/container-storage-interface/spec) +(CSI) is a specification that defines a common set of APIs for all interactions +between the storage vendors and the container orchestration platforms. Building +CSI support allows Mesos to make use of the quickly-growing CSI ecosystem. + +We already have a [solution](../csi.md) to support CSI introduced in the Mesos +1.5.0 release, but that solution has a limitation: it requires CSI plugins to +implement the [ListVolumes](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#listvolumes) +and [GetCapacity](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#getcapacity) +APIs so that the external storage can be modeled as Mesos raw disk resources and +then offered to frameworks. However there are a lot of 3rd party CSI plugins the +do not implement those two APIs. + +Mesos 1.11.0 provides a more generic way to support 3rd party CSI plugins so +that Mesos can work with broader external storage ecosystem and we will benefit +from continued development of the community CSI plugins. + +## How does it work? + +The `volume/csi` isolator interacts with CSI plugins via the plugin's gRPC +endpoint. + +When a new task with CSI volumes is launched, the `volume/csi` isolator will +call the CSI plugin to publish the specified CSI volumes onto the agent host +and then mount them onto the task container. When the task terminates, the +`volume/csi` isolator will call the CSI plugin to unpublish the specified CSI +volumes. + +Currently the `volume/csi` isolator will only call the CSI plugin's [node service](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#node-service-rpc) but not [controller service](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#controller-service-rpc), that means: + +- We only support pre-provisioned CSI volume but not dynamic CSI volumes + provisioning, so operators need to create the CSI volumes explicitly and + provide the volume info (e.g. volume ID, context, etc.) to frameworks so + that frameworks can use the volumes in their tasks. + +- We do not support the CSI volumes that require the controller service to + publish to a node ([ControllerPublishVolume](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#controllerpublishvolume)) prior to the node service publishing on the node + ([NodePublishVolume](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#nodepublishvolume)). + +## Configuration + +To use the `volume/csi` isolator, there are certain actions required by +operators and framework developers. In this section we list the steps +required by the operator to configure the `volume/csi` isolator and the steps +required by framework developers to specify CSI volumes in their tasks. + +### Pre-conditions + +- Explicitly create the CSI volumes that are going to be accessed by Mesos + tasks. For some CSI plugins (e.g. [NFS](https://github.com/kubernetes-csi/csi-driver-nfs)), + they do not implement the [CreateVolume](https://github.com/container-storage-interface/spec/blob/v1.3.0/spec.md#createvolume) + API, so operators do not need to create the volume explicitly in this case. + +### Configuring the CSI Volume Isolator + +In order to configure the `volume/csi` iso
[mesos] branch master updated (c28fd3a -> 90e5434)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from c28fd3a Re-added the obsolete `updateFramework` signature into libmesos-java.so. new 3e1e0b3 Added doc for the `volume/csi` isolator. new 90e5434 Corrected the example of the managed CSI plugin. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: docs/configuration/agent.md | 8 +- docs/isolators/csi-volume.md | 339 +++ docs/mesos-containerizer.md | 1 + src/slave/flags.cpp | 8 +- 4 files changed, 342 insertions(+), 14 deletions(-) create mode 100644 docs/isolators/csi-volume.md
[mesos] branch master updated: Inferred CSI volume's `readonly` field from volume mode.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 8700dd8 Inferred CSI volume's `readonly` field from volume mode. 8700dd8 is described below commit 8700dd8d5ece658804d7b7a40863800dcc5c72bc Author: Qian Zhang AuthorDate: Sat Sep 19 11:11:04 2020 +0800 Inferred CSI volume's `readonly` field from volume mode. Review: https://reviews.apache.org/r/72888 --- include/mesos/mesos.proto | 7 ++-- include/mesos/v1/mesos.proto | 7 ++-- .../mesos/isolators/volume/csi/isolator.cpp| 18 +++--- .../mesos/isolators/volume/csi/isolator.hpp| 5 ++- src/slave/csi_server.cpp | 42 -- src/slave/csi_server.hpp | 3 +- .../containerizer/volume_csi_isolator_tests.cpp| 39 +++- src/tests/mesos.hpp| 41 +++-- 8 files changed, 65 insertions(+), 97 deletions(-) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index a100844..a51d6fa 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -3205,16 +3205,15 @@ message Volume { message StaticProvisioning { required string volume_id = 1; required VolumeCapability volume_capability = 2; -optional bool readonly = 3; // The secrets needed for staging/publishing the volume, e.g.: // { // "username": {"type": REFERENCE, "reference": {"name": "U_SECRET"}}, // "password": {"type": REFERENCE, "reference": {"name": "P_SECRET"}} // } -map node_stage_secrets = 4; -map node_publish_secrets = 5; -map volume_context = 6; +map node_stage_secrets = 3; +map node_publish_secrets = 4; +map volume_context = 5; } optional StaticProvisioning static_provisioning = 2; diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index 09973ab..ad7092e 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -3194,16 +3194,15 @@ message Volume { message StaticProvisioning { required string volume_id = 1; required VolumeCapability volume_capability = 2; -optional bool readonly = 3; // The secrets needed for staging/publishing the volume, e.g.: // { // "username": {"type": REFERENCE, "reference": {"name": "U_SECRET"}}, // "password": {"type": REFERENCE, "reference": {"name": "P_SECRET"}} // } -map node_stage_secrets = 4; -map node_publish_secrets = 5; -map volume_context = 6; +map node_stage_secrets = 3; +map node_publish_secrets = 4; +map volume_context = 5; } optional StaticProvisioning static_provisioning = 2; diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp index 79a6860..8180b19 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -273,13 +273,6 @@ Future> VolumeCSIIsolatorProcess::prepare( const AccessMode& accessMode = csiVolume.static_provisioning().volume_capability().access_mode(); -if (csiVolume.static_provisioning().readonly() && -_volume.mode() == Volume::RW) { - return Failure( - "Cannot publish the volume '" + volumeId + - "' in read-only mode but use it in read-write mode"); -} - if ((accessMode.mode() == AccessMode::SINGLE_NODE_READER_ONLY || accessMode.mode() == AccessMode::MULTI_NODE_READER_ONLY) && _volume.mode() == Volume::RW) { @@ -355,10 +348,9 @@ Future> VolumeCSIIsolatorProcess::prepare( } Mount mount; -mount.csiVolume = csiVolume; -mount.volume = volume; +mount.volume = _volume; +mount.csiVolume = volume; mount.target = target; -mount.volumeMode = _volume.mode(); mounts.push_back(mount); volumeSet.insert(volume); @@ -390,7 +382,7 @@ Future> VolumeCSIIsolatorProcess::prepare( vector> futures; futures.reserve(mounts.size()); foreach (const Mount& mount, mounts) { -futures.push_back(csiServer->publishVolume(mount.csiVolume)); +futures.push_back(csiServer->publishVolume(mount.volume)); } return await(futures) @@ -449,7 +441,7 @@ Future> VolumeCSIIsolatorProcess::_prepare( c
[mesos] branch master updated: Moved the `volume/csi` isolator's root dir under work dir.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new a16f343 Moved the `volume/csi` isolator's root dir under work dir. a16f343 is described below commit a16f3439dca13982bb4a2b9190c24aaf4eb73b0e Author: Qian Zhang AuthorDate: Tue Sep 1 20:58:35 2020 +0800 Moved the `volume/csi` isolator's root dir under work dir. The `volume/csi` isolator needs to checkpoint CSI volume state under work dir rather than runtime dir to be consistent with what volume manager does. Otherwise after agent host is rebooted, volume manager may publish some volumes during recovery, and those volumes will never get chance to be unpublished since the `volume/csi` isolator does not know those volumes at all (the contents in runtime dir will be gone after reboot). Review: https://reviews.apache.org/r/72829 --- src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp | 2 +- src/slave/containerizer/mesos/isolators/volume/csi/paths.hpp| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp index d5d8835..79a6860 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -69,7 +69,7 @@ Try VolumeCSIIsolatorProcess::create( return Error("No CSI server is provided"); } - const string csiRootDir = path::join(flags.runtime_dir, csi::paths::CSI_DIR); + const string csiRootDir = path::join(flags.work_dir, csi::paths::CSI_DIR); // Create the CSI volume information root directory if it does not exist. Try mkdir = os::mkdir(csiRootDir); diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/paths.hpp b/src/slave/containerizer/mesos/isolators/volume/csi/paths.hpp index 5b4a4ee..d551809 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/paths.hpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/paths.hpp @@ -29,7 +29,7 @@ namespace paths { // The root directory where we keep the information of CSI volumes that each // container uses. The layout is as follows: -// //isolators/volume/csi/ +// //isolators/volume/csi/ // |-- / // | |-- volumes // |-- /
[mesos] 02/02: Relaxed unknown volume check when unpublishing volumes.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit ea4099028cfe93e1e2fd80e4d30e03057ec27df1 Author: Qian Zhang AuthorDate: Sun Aug 30 10:23:06 2020 +0800 Relaxed unknown volume check when unpublishing volumes. Review: https://reviews.apache.org/r/72820 --- src/csi/v0_volume_manager.cpp | 5 - src/csi/v1_volume_manager.cpp | 5 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/csi/v0_volume_manager.cpp b/src/csi/v0_volume_manager.cpp index 8ba6100..3d5caa8 100644 --- a/src/csi/v0_volume_manager.cpp +++ b/src/csi/v0_volume_manager.cpp @@ -498,7 +498,10 @@ Future VolumeManagerProcess::publishVolume( Future VolumeManagerProcess::unpublishVolume(const string& volumeId) { if (!volumes.contains(volumeId)) { -return Failure("Cannot unpublish unknown volume '" + volumeId + "'"); +LOG(WARNING) << "Ignoring unpublish request for unknown volume '" + << volumeId << "'"; + +return Nothing(); } VolumeData& volume = volumes.at(volumeId); diff --git a/src/csi/v1_volume_manager.cpp b/src/csi/v1_volume_manager.cpp index 29ae821..c50fda2 100644 --- a/src/csi/v1_volume_manager.cpp +++ b/src/csi/v1_volume_manager.cpp @@ -519,7 +519,10 @@ Future VolumeManagerProcess::publishVolume( Future VolumeManagerProcess::unpublishVolume(const string& volumeId) { if (!volumes.contains(volumeId)) { -return Failure("Cannot unpublish unknown volume '" + volumeId + "'"); +LOG(WARNING) << "Ignoring unpublish request for unknown volume '" + << volumeId << "'"; + +return Nothing(); } VolumeData& volume = volumes.at(volumeId);
[mesos] 01/02: Enabled CSI volume access for non-root users.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 17f28563488ddaeb2daa60b53bd8dc19e25cddef Author: Qian Zhang AuthorDate: Wed Aug 26 10:33:26 2020 +0800 Enabled CSI volume access for non-root users. Review: https://reviews.apache.org/r/72804 --- .../mesos/isolators/volume/csi/isolator.cpp| 40 ++ .../mesos/isolators/volume/csi/isolator.hpp| 2 ++ 2 files changed, 42 insertions(+) diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp index 02ef1f2..d5d8835 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -356,6 +356,7 @@ Future> VolumeCSIIsolatorProcess::prepare( Mount mount; mount.csiVolume = csiVolume; +mount.volume = volume; mount.target = target; mount.volumeMode = _volume.mode(); @@ -398,6 +399,9 @@ Future> VolumeCSIIsolatorProcess::prepare( &VolumeCSIIsolatorProcess::_prepare, containerId, mounts, +containerConfig.has_user() + ? containerConfig.user() + : Option::none(), lambda::_1)); } @@ -405,6 +409,7 @@ Future> VolumeCSIIsolatorProcess::prepare( Future> VolumeCSIIsolatorProcess::_prepare( const ContainerID& containerId, const vector& mounts, +const Option& user, const vector>& futures) { @@ -432,6 +437,41 @@ Future> VolumeCSIIsolatorProcess::_prepare( const string& source = sources[i]; const Mount& mount = mounts[i]; +if (user.isSome() && user.get() != "root") { + bool isVolumeInUse = false; + + // Check if the volume is currently used by another container. + foreachpair (const ContainerID& _containerId, + const Owned& info, + infos) { +// Skip self. +if (_containerId == containerId) { + continue; +} + +if (info->volumes.contains(mount.volume)) { + isVolumeInUse = true; + break; +} + } + + if (!isVolumeInUse) { +LOG(INFO) << "Changing the ownership of the CSI volume at '" << source + << "' to user '" << user.get() << "' for container " + << containerId; + +Try chown = os::chown(user.get(), source, false); +if (chown.isError()) { + return Failure( + "Failed to set '" + user.get() + "' as the owner of the " + "CSI volume at '" + source + "': " + chown.error()); +} + } else { +LOG(INFO) << "Leaving the ownership of the CSI volume at '" + << source << "' unchanged because it is in use"; + } +} + LOG(INFO) << "Mounting CSI volume mount point '" << source << "' to '" << mount.target << "' for container " << containerId; diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp index 373b629..4349acd 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp @@ -68,6 +68,7 @@ private: struct Mount { Volume::Source::CSIVolume csiVolume; +CSIVolume volume; std::string target; Volume::Mode volumeMode; }; @@ -92,6 +93,7 @@ private: process::Future> _prepare( const ContainerID& containerId, const std::vector& mounts, + const Option& user, const std::vector>& futures); process::Future _cleanup(
[mesos] branch master updated (a1bfa74 -> ea40990)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from a1bfa74 Fixed broken authorization in the CSI server. new 17f2856 Enabled CSI volume access for non-root users. new ea40990 Relaxed unknown volume check when unpublishing volumes. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: src/csi/v0_volume_manager.cpp | 5 ++- src/csi/v1_volume_manager.cpp | 5 ++- .../mesos/isolators/volume/csi/isolator.cpp| 40 ++ .../mesos/isolators/volume/csi/isolator.hpp| 2 ++ 4 files changed, 50 insertions(+), 2 deletions(-)
[mesos] branch master updated (22ecccc -> d8647b0)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from 22e Fixed a bug in CSI server initialization. new 2d2265d Introduced the `CSIPluginInfo.target_path_exists` field. new d8647b0 Refactored state recovery in `volume/csi` isolator. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: include/mesos/mesos.proto | 7 include/mesos/v1/mesos.proto | 7 src/csi/v1_volume_manager.cpp | 39 -- .../mesos/isolators/volume/csi/isolator.cpp| 18 ++ 4 files changed, 45 insertions(+), 26 deletions(-)
[mesos] 02/02: Refactored state recovery in `volume/csi` isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit d8647b018fbcfc38ccf0e39bfeae9118e275068f Author: Qian Zhang AuthorDate: Thu Aug 20 17:09:36 2020 +0800 Refactored state recovery in `volume/csi` isolator. Read the checkpointed CSI volume state directly in protobuf message way. Review: https://reviews.apache.org/r/72789 --- .../mesos/isolators/volume/csi/isolator.cpp| 18 +++--- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp index 535974b..02ef1f2 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -198,14 +198,12 @@ Try VolumeCSIIsolatorProcess::recoverContainer( return Nothing(); } - Result read = state::read(volumesPath); + Result read = state::read(volumesPath); if (read.isError()) { return Error( "Failed to read the CSI volumes checkpoint file '" + volumesPath + "': " + read.error()); - } - - if (read->empty()) { + } else if (read.isNone()) { // This could happen if agent is hard rebooted after the checkpoint file is // created but before the data is synced on disk. LOG(WARNING) << "The CSI volumes checkpointed at '" << volumesPath @@ -220,18 +218,8 @@ Try VolumeCSIIsolatorProcess::recoverContainer( return Nothing(); } - Try json = JSON::parse(read.get()); - if (json.isError()) { -return Error("JSON parse failed: " + json.error()); - } - - Try parse = ::protobuf::parse(json.get()); - if (parse.isError()) { -return Error("Protobuf parse failed: " + parse.error()); - } - hashset volumes; - foreach (const CSIVolume& volume, parse->volumes()) { + foreach (const CSIVolume& volume, read->volumes()) { VLOG(1) << "Recovering CSI volume with plugin '" << volume.plugin_name() << "' and ID '" << volume.id() << "' for container " << containerId;
[mesos] 01/02: Introduced the `CSIPluginInfo.target_path_exists` field.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 2d2265de7df7801612fc2f104f9c8f455a97a1fd Author: Qian Zhang AuthorDate: Thu Aug 20 17:08:32 2020 +0800 Introduced the `CSIPluginInfo.target_path_exists` field. Review: https://reviews.apache.org/r/72788 --- include/mesos/mesos.proto | 7 +++ include/mesos/v1/mesos.proto | 7 +++ src/csi/v1_volume_manager.cpp | 39 --- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index 661f746..a100844 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -1140,6 +1140,13 @@ message CSIPluginInfo { // Each volume will be published by the CSI plugin at a sub-directory // under this path. optional string target_path_root = 5; + + // For some CSI plugins which implement CSI v1 spec, they expect the target + // path is an existing path which is actually not CSI v1 spec compliant. In + // such case this field should be set to `true` as a work around for those + // plugins. For the CSI plugins which implement CSI v0 spec, this field will + // be just ignored. + optional bool target_path_exists = 6; } diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index ffe45c3..09973ab 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -1128,6 +1128,13 @@ message CSIPluginInfo { // Each volume will be published by the CSI plugin at a sub-directory // under this path. optional string target_path_root = 5; + + // For some CSI plugins which implement CSI v1 spec, they expect the target + // path is an existing path which is actually not CSI v1 spec compliant. In + // such case this field should be set to `true` as a work around for those + // plugins. For the CSI plugins which implement CSI v0 spec, this field will + // be just ignored. + optional bool target_path_exists = 6; } diff --git a/src/csi/v1_volume_manager.cpp b/src/csi/v1_volume_manager.cpp index 1a1b97c..29ae821 100644 --- a/src/csi/v1_volume_manager.cpp +++ b/src/csi/v1_volume_manager.cpp @@ -952,16 +952,29 @@ Future VolumeManagerProcess::_publishVolume(const string& volumeId) const string targetPath = paths::getMountTargetPath(mountRootDir, volumeId); - // Ensure the parent directory of the target path exists. The target path - // itself will be created by the plugin. - // - // NOTE: The target path will be removed by the plugin as well, and The parent - // directory of the target path will be cleaned up during volume removal. - Try mkdir = os::mkdir(Path(targetPath).dirname()); - if (mkdir.isError()) { -return Failure( -"Failed to create parent directory of target path '" + targetPath + -"': " + mkdir.error()); + if (info.target_path_exists()) { +// For some CSI plugins, they expect the target path is an existing path +// rather than creating the target path. So here we create the target path +// for such CSI plugins. +Try mkdir = os::mkdir(targetPath); +if (mkdir.isError()) { + return Failure( + "Failed to create the target path '" + targetPath + + "': " + mkdir.error()); +} + } else { +// Ensure the parent directory of the target path exists. The +// target path itself will be created by the plugin. +// +// NOTE: The target path will be removed by the plugin as well, +// and the parent directory of the target path will be cleaned +// up during volume removal. +Try mkdir = os::mkdir(Path(targetPath).dirname()); +if (mkdir.isError()) { + return Failure( + "Failed to create parent directory of target path '" + targetPath + + "': " + mkdir.error()); +} } if (volumeState.state() == VolumeState::VOL_READY) { @@ -1244,7 +1257,11 @@ Future VolumeManagerProcess::__unpublishVolume(const string& volumeId) return call(NODE_SERVICE, &Client::nodeUnpublishVolume, std::move(request)) .then(process::defer(self(), [this, volumeId, targetPath]() -> Future { - if (os::exists(targetPath)) { + // For the CSI plugins which expect the target path is an existing path, + // they do not remove the target path as part of the `NodeUnpublishVolume` + // operation. So here we should not verify the target path is already + // removed by such CSI plugins. + if (!info.target_path_exists() && os::exists(targetPath)) { return Failure("Target path '" + targetPath + "' not removed"); }
[mesos] branch master updated: Exposed Mesos agent ID to managed CSI plugins.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new b1b3ed0 Exposed Mesos agent ID to managed CSI plugins. b1b3ed0 is described below commit b1b3ed096e31260a25ba74927786db3f92afb9d3 Author: Qian Zhang AuthorDate: Wed Aug 12 17:25:11 2020 +0800 Exposed Mesos agent ID to managed CSI plugins. Review: https://reviews.apache.org/r/72759 --- src/csi/service_manager.cpp| 22 ++ src/csi/service_manager.hpp| 1 + src/resource_provider/storage/provider.cpp | 1 + src/slave/csi_server.cpp | 2 ++ 4 files changed, 26 insertions(+) diff --git a/src/csi/service_manager.cpp b/src/csi/service_manager.cpp index 7a8d8e5..5b298a1 100644 --- a/src/csi/service_manager.cpp +++ b/src/csi/service_manager.cpp @@ -128,6 +128,7 @@ class ServiceManagerProcess : public Process { public: ServiceManagerProcess( + const SlaveID& _agentId, const http::URL& _agentUrl, const string& _rootDir, const CSIPluginInfo& _info, @@ -174,6 +175,7 @@ private: // a new container. Future getEndpoint(const ContainerID& containerId); + const SlaveID agentId; const http::URL agentUrl; const string rootDir; const CSIPluginInfo info; @@ -201,6 +203,7 @@ private: ServiceManagerProcess::ServiceManagerProcess( +const SlaveID& _agentId, const http::URL& _agentUrl, const string& _rootDir, const CSIPluginInfo& _info, @@ -210,6 +213,7 @@ ServiceManagerProcess::ServiceManagerProcess( const Runtime& _runtime, Metrics* _metrics) : ProcessBase(process::ID::generate("csi-service-manager")), +agentId(_agentId), agentUrl(_agentUrl), rootDir(_rootDir), info(_info), @@ -252,6 +256,7 @@ ServiceManagerProcess::ServiceManagerProcess( const Runtime& _runtime, Metrics* _metrics) : ProcessBase(process::ID::generate("csi-service-manager")), +agentId(), agentUrl(), rootDir(), info(_info), @@ -723,9 +728,24 @@ Future ServiceManagerProcess::getEndpoint( const string endpoint = "unix://" + endpointPath.get(); Environment::Variable* endpoint_ = commandInfo.mutable_environment()->add_variables(); + endpoint_->set_name("CSI_ENDPOINT"); endpoint_->set_value(endpoint); + // For some CSI Plugins (like NFS CSI plugin), their node service need + // a node ID specified by container orchestrator, so here we expose agent + // ID to the plugins, they can use that as the node ID. + if (config->services().end() != std::find( + config->services().begin(), + config->services().end(), + NODE_SERVICE)) { +Environment::Variable* nodeId = + commandInfo.mutable_environment()->add_variables(); + +nodeId->set_name("MESOS_AGENT_ID"); +nodeId->set_value(stringify(agentId)); + } + ContainerInfo containerInfo; if (config->has_container()) { @@ -839,6 +859,7 @@ Future ServiceManagerProcess::getEndpoint( ServiceManager::ServiceManager( +const SlaveID& agentId, const http::URL& agentUrl, const string& rootDir, const CSIPluginInfo& info, @@ -848,6 +869,7 @@ ServiceManager::ServiceManager( const Runtime& runtime, Metrics* metrics) : process(new ServiceManagerProcess( +agentId, agentUrl, rootDir, info, diff --git a/src/csi/service_manager.hpp b/src/csi/service_manager.hpp index 76a80fb..24356f8 100644 --- a/src/csi/service_manager.hpp +++ b/src/csi/service_manager.hpp @@ -54,6 +54,7 @@ public: // This is for the managed CSI plugins which will be // launched as standalone containers. ServiceManager( + const SlaveID& agentId, const process::http::URL& agentUrl, const std::string& rootDir, const CSIPluginInfo& info, diff --git a/src/resource_provider/storage/provider.cpp b/src/resource_provider/storage/provider.cpp index 0a8dc26..1a202af 100644 --- a/src/resource_provider/storage/provider.cpp +++ b/src/resource_provider/storage/provider.cpp @@ -567,6 +567,7 @@ Future StorageLocalResourceProviderProcess::recover() CHECK_EQ(RECOVERING, state); serviceManager.reset(new ServiceManager( + slaveId, extractParentEndpoint(url), slave::paths::getCsiRootDir(workDir), info.storage().plugin(), diff --git a/src/slave/csi_server.cpp b/src/slave/csi_server.cpp index b435d5d..2ba4f22 100644 --- a/src/slave/csi_server.cpp +++ b/src/slave/csi_server.cpp @@ -125,6 +125,7 @@ private: SecretResolver* secretResolver; Option authToken; hashmap pluginConfigs; + Option agentId; }; @@ -172,6 +173,7 @@ Future CSIServerProcess:
[mesos] branch master updated (014431e -> 1fb79fe)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from 014431e Updated volume manager to support user specified target path root. new a3e8fd8 Implemented the framework and `create` method of `volume/csi` isolator. new 83ae449 Implemented the `prepare` method of `volume/csi` isolator. new 17db3a9 Implemented the `cleanup` method of `volume/csi` isolator. new 64cd6b8 Implemented the `recover` method of `volume/csi` isolator. new 1fb79fe Enabled the `volume/csi` isolator in `MesosContainerizer`. The 5 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: include/mesos/mesos.proto | 8 +- include/mesos/v1/mesos.proto | 8 +- src/CMakeLists.txt | 3 + src/Makefile.am| 8 + src/common/validation.cpp | 12 + src/slave/containerizer/containerizer.cpp | 7 +- src/slave/containerizer/containerizer.hpp | 4 +- src/slave/containerizer/mesos/containerizer.cpp| 10 +- src/slave/containerizer/mesos/containerizer.hpp| 4 +- .../mesos/isolators/volume/csi/isolator.cpp| 546 + .../posix.hpp => volume/csi/isolator.hpp} | 86 ++-- .../{docker/volume => volume/csi}/paths.cpp| 17 +- .../{docker/volume => volume/csi}/paths.hpp| 24 +- .../{docker/volume => volume/csi}/state.hpp| 22 +- .../{docker/volume => volume/csi}/state.proto | 12 +- 15 files changed, 695 insertions(+), 76 deletions(-) create mode 100644 src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp copy src/slave/containerizer/mesos/isolators/{filesystem/posix.hpp => volume/csi/isolator.hpp} (50%) copy src/slave/containerizer/mesos/isolators/{docker/volume => volume/csi}/paths.cpp (74%) copy src/slave/containerizer/mesos/isolators/{docker/volume => volume/csi}/paths.hpp (75%) copy src/slave/containerizer/mesos/isolators/{docker/volume => volume/csi}/state.hpp (65%) copy src/slave/containerizer/mesos/isolators/{docker/volume => volume/csi}/state.proto (83%)
[mesos] 02/05: Implemented the `prepare` method of `volume/csi` isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 83ae449fa396d92e708e2ba8bb6f1312eb0fd5dd Author: Qian Zhang AuthorDate: Tue Aug 4 15:44:24 2020 +0800 Implemented the `prepare` method of `volume/csi` isolator. Review: https://reviews.apache.org/r/72733 --- include/mesos/mesos.proto | 8 +- include/mesos/v1/mesos.proto | 8 +- src/CMakeLists.txt | 1 + src/Makefile.am| 4 + src/common/validation.cpp | 12 ++ .../mesos/isolators/volume/csi/isolator.cpp| 214 - .../mesos/isolators/volume/csi/isolator.hpp| 24 +++ .../mesos/isolators/volume/csi/state.hpp | 61 ++ .../mesos/isolators/volume/csi/state.proto | 29 +++ 9 files changed, 352 insertions(+), 9 deletions(-) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index 0f91d88..661f746 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -3055,10 +3055,10 @@ message Volume { // TODO(gyliu513): Make this as `optional` after deprecation cycle of 1.0. required Mode mode = 3; - // Path pointing to a directory or file in the container. If the - // path is a relative path, it is relative to the container work - // directory. If the path is an absolute path, that path must - // already exist. + // Path pointing to a directory or file in the container. If the path + // is a relative path, it is relative to the container work directory. + // If the path is an absolute path and the container does not have its + // own rootfs, that path must already exist in the agent host rootfs. required string container_path = 1; // The following specifies the source of this volume. At most one of diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index f25db8a..ffe45c3 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -3044,10 +3044,10 @@ message Volume { // TODO(gyliu513): Make this as `optional` after deprecation cycle of 1.0. required Mode mode = 3; - // Path pointing to a directory or file in the container. If the - // path is a relative path, it is relative to the container work - // directory. If the path is an absolute path, that path must - // already exist. + // Path pointing to a directory or file in the container. If the path + // is a relative path, it is relative to the container work directory. + // If the path is an absolute path and the container does not have its + // own rootfs, that path must already exist in the agent host rootfs. required string container_path = 1; // The following specifies the source of this volume. At most one of diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f3abdbf..a976dc1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,7 @@ if (NOT WIN32) PROTOC_GENERATE(INTERNAL TARGET csi/state) PROTOC_GENERATE(INTERNAL TARGET resource_provider/storage/disk_profile) PROTOC_GENERATE(INTERNAL TARGET slave/containerizer/mesos/isolators/docker/volume/state) + PROTOC_GENERATE(INTERNAL TARGET slave/containerizer/mesos/isolators/volume/csi/state) PROTOC_GENERATE(INTERNAL TARGET slave/containerizer/mesos/provisioner/docker/message) PROTOC_GENERATE(INTERNAL TARGET slave/volume_gid_manager/state) endif () diff --git a/src/Makefile.am b/src/Makefile.am index 70e844d..6d68ed0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -414,6 +414,8 @@ CXX_PROTOS += \ slave/containerizer/mesos/isolators/docker/volume/state.pb.h \ slave/containerizer/mesos/isolators/network/cni/spec.pb.cc \ slave/containerizer/mesos/isolators/network/cni/spec.pb.h\ + slave/containerizer/mesos/isolators/volume/csi/state.pb.cc \ + slave/containerizer/mesos/isolators/volume/csi/state.pb.h\ slave/volume_gid_manager/state.pb.cc \ slave/volume_gid_manager/state.pb.h @@ -1027,6 +1029,7 @@ libmesos_no_3rdparty_la_SOURCES = \ slave/containerizer/mesos/provisioner/docker/message.proto \ slave/containerizer/mesos/isolators/docker/volume/state.proto \ slave/containerizer/mesos/isolators/network/cni/spec.proto \ + slave/containerizer/mesos/isolators/volume/csi/state.proto \ slave/volume_gid_manager/state.proto # TODO(tillt): Remove authentication/cram_md5/* which will enable us to @@ -1253,6 +1256,7 @@ libmesos_no_3rdparty_la_SOURCES += \ slave/containerizer/mesos/isolators/volume/sandbox_path.hpp \ slave/containerizer/mesos/isolators/volume/csi
[mesos] 04/05: Implemented the `recover` method of `volume/csi` isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 64cd6b82786de0bde3ddaaf221b5ab9a106c87c7 Author: Qian Zhang AuthorDate: Sat Aug 8 23:53:31 2020 +0800 Implemented the `recover` method of `volume/csi` isolator. Review: https://reviews.apache.org/r/72753 --- .../mesos/isolators/volume/csi/isolator.cpp| 142 + .../mesos/isolators/volume/csi/isolator.hpp| 2 + 2 files changed, 144 insertions(+) diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp index d61fe30..535974b 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include @@ -35,6 +36,7 @@ #include "slave/containerizer/mesos/isolators/volume/csi/isolator.hpp" #include "slave/containerizer/mesos/isolators/volume/csi/paths.hpp" +using std::list; using std::string; using std::vector; @@ -104,6 +106,146 @@ Future VolumeCSIIsolatorProcess::recover( const vector& states, const hashset& orphans) { + foreach (const ContainerState& state, states) { +const ContainerID& containerId = state.container_id(); + +Try recover = recoverContainer(containerId); +if (recover.isError()) { + return Failure( + "Failed to recover CSI volumes for container " + + stringify(containerId) + ": " + recover.error()); +} + } + + // Recover any orphan containers that we might have check pointed. + // These orphan containers will be destroyed by the containerizer + // through the regular cleanup path. See MESOS-2367 for details. + foreach (const ContainerID& containerId, orphans) { +Try recover = recoverContainer(containerId); +if (recover.isError()) { + return Failure( + "Failed to recover CSI volumes for orphan container " + + stringify(containerId) + ": " + recover.error()); +} + } + + // Walk through all the checkpointed containers to determine if + // there are any unknown orphan containers. + Try> entries = os::ls(rootDir); + if (entries.isError()) { +return Failure( +"Unable to list CSI volume checkpoint directory '" + +rootDir + "': " + entries.error()); + } + + foreach (const string& entry, entries.get()) { +ContainerID containerId = + protobuf::parseContainerId(Path(entry).basename()); + +// Check if this container has already been recovered. +if (infos.contains(containerId)) { + continue; +} + +// An unknown orphan container. Recover it and then clean it up. +Try recover = recoverContainer(containerId); +if (recover.isError()) { + return Failure( + "Failed to recover CSI volumes for orphan container " + + stringify(containerId) + ": " + recover.error()); +} + +LOG(INFO) << "Cleaning up CSI volumes for unknown orphaned " + << "container " << containerId; + +cleanup(containerId); + } + + return Nothing(); +} + + +Try VolumeCSIIsolatorProcess::recoverContainer( +const ContainerID& containerId) +{ + const string containerDir = csi::paths::getContainerDir(rootDir, containerId); + if (!os::exists(containerDir)) { +// This may occur in the following cases: +// 1. The container has exited and the isolator has removed the +// container directory in '_cleanup()' but agent dies before +// noticing this. +// 2. Agent dies before the isolator checkpoints CSI volumes for +// the container in 'prepare()'. +// For the above cases, we do not need to do anything since there +// is nothing to clean up for this container after agent restarts. +return Nothing(); + } + + const string volumesPath = csi::paths::getVolumesPath(rootDir, containerId); + if (!os::exists(volumesPath)) { +// This may occur if agent dies after creating the container directory +// but before it checkpoints anything in it. +LOG(WARNING) << "The CSI volumes checkpoint file expected at '" + << volumesPath << "' for container " << containerId + << " does not exist"; + +// Construct an info object with empty CSI volumes since no CSI volumes +// are mounted yet for this container, and this container will be cleaned +// up by containerizer (as known orphan container) or by `recover` (as +// unknown orphan c
[mesos] 01/05: Implemented the framework and `create` method of `volume/csi` isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit a3e8fd89b1a34cf479c454e9991712cd2999affe Author: Qian Zhang AuthorDate: Thu Jul 16 22:13:18 2020 +0800 Implemented the framework and `create` method of `volume/csi` isolator. Review: https://reviews.apache.org/r/72690 --- src/CMakeLists.txt | 2 + src/Makefile.am| 4 + .../mesos/isolators/volume/csi/isolator.cpp| 114 + .../mesos/isolators/volume/csi/isolator.hpp| 86 .../mesos/isolators/volume/csi/paths.cpp | 47 + .../mesos/isolators/volume/csi/paths.hpp | 57 +++ 6 files changed, 310 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c60d98a..f3abdbf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -196,6 +196,7 @@ if (NOT WIN32) slave/containerizer/mesos/isolators/posix/disk.cpp slave/containerizer/mesos/isolators/posix/rlimits.cpp slave/containerizer/mesos/isolators/volume/sandbox_path.cpp +slave/containerizer/mesos/isolators/volume/csi/paths.cpp slave/containerizer/mesos/provisioner/appc/cache.cpp slave/containerizer/mesos/provisioner/appc/fetcher.cpp slave/containerizer/mesos/provisioner/appc/paths.cpp @@ -336,6 +337,7 @@ set(LINUX_SRC slave/containerizer/mesos/isolators/volume/image.cpp slave/containerizer/mesos/isolators/volume/secret.cpp slave/containerizer/mesos/isolators/volume/utils.cpp + slave/containerizer/mesos/isolators/volume/csi/isolator.cpp slave/containerizer/mesos/provisioner/backends/aufs.cpp slave/containerizer/mesos/provisioner/backends/bind.cpp slave/containerizer/mesos/provisioner/backends/overlay.cpp) diff --git a/src/Makefile.am b/src/Makefile.am index 49dab4b..70e844d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1251,6 +1251,8 @@ libmesos_no_3rdparty_la_SOURCES += \ slave/containerizer/mesos/isolators/posix/rlimits.hpp \ slave/containerizer/mesos/isolators/volume/sandbox_path.cpp \ slave/containerizer/mesos/isolators/volume/sandbox_path.hpp \ + slave/containerizer/mesos/isolators/volume/csi/paths.cpp \ + slave/containerizer/mesos/isolators/volume/csi/paths.hpp \ slave/containerizer/mesos/isolators/windows/cpu.hpp \ slave/containerizer/mesos/isolators/windows/mem.hpp \ slave/containerizer/mesos/launch.cpp \ @@ -1454,6 +1456,8 @@ MESOS_LINUX_FILES = \ slave/containerizer/mesos/isolators/volume/secret.hpp \ slave/containerizer/mesos/isolators/volume/utils.cpp \ slave/containerizer/mesos/isolators/volume/utils.hpp \ + slave/containerizer/mesos/isolators/volume/csi/isolator.cpp \ + slave/containerizer/mesos/isolators/volume/csi/isolator.hpp \ slave/containerizer/mesos/provisioner/backends/aufs.cpp \ slave/containerizer/mesos/provisioner/backends/aufs.hpp \ slave/containerizer/mesos/provisioner/backends/bind.cpp \ diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp new file mode 100644 index 000..7ec3a4e --- /dev/null +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include + +#include + +#include + +#include "slave/containerizer/mesos/isolators/volume/csi/isolator.hpp" +#include "slave/containerizer/mesos/isolators/volume/csi/paths.hpp" + +using std::string; +using std::vector; + +using process::Futur
[mesos] 03/05: Implemented the `cleanup` method of `volume/csi` isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 17db3a90c644b5044e2b101cb431e37c2d23fed5 Author: Qian Zhang AuthorDate: Wed Aug 5 17:14:46 2020 +0800 Implemented the `cleanup` method of `volume/csi` isolator. Review: https://reviews.apache.org/r/72734 --- .../mesos/isolators/volume/csi/isolator.cpp| 78 ++ .../mesos/isolators/volume/csi/isolator.hpp| 4 ++ 2 files changed, 82 insertions(+) diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp index 90a526f..d61fe30 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.cpp @@ -318,6 +318,84 @@ Future> VolumeCSIIsolatorProcess::_prepare( Future VolumeCSIIsolatorProcess::cleanup( const ContainerID& containerId) { + if (!infos.contains(containerId)) { +VLOG(1) << "Ignoring cleanup request for unknown container " << containerId; +return Nothing(); + } + + hashmap references; + foreachvalue (const Owned& info, infos) { +foreach (const CSIVolume& volume, info->volumes) { + if (!references.contains(volume)) { +references[volume] = 1; + } else { +references[volume]++; + } +} + } + + vector> futures; + + foreach (const CSIVolume& volume, infos[containerId]->volumes) { +if (references.contains(volume) && references[volume] > 1) { + VLOG(1) << "Cannot unpublish the volume with plugin '" + << volume.plugin_name() << "' and ID '" << volume.id() + << "' for container " << containerId + << " since its reference count is " << references[volume]; + continue; +} + +LOG(INFO) << "Unpublishing the volume with plugin '" + << volume.plugin_name() << "' and ID '" << volume.id() + << "' for container " << containerId; + +// Invoke CSI server to unpublish the volumes. +futures.push_back( +csiServer->unpublishVolume(volume.plugin_name(), volume.id())); + } + + // Erase the `Info` struct of this container before unpublishing the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unpublishing volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unpublish the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + + return await(futures) +.then(defer( +PID(this), +&VolumeCSIIsolatorProcess::_cleanup, +containerId, +lambda::_1)); +} + + +Future VolumeCSIIsolatorProcess::_cleanup( +const ContainerID& containerId, +const vector>& futures) +{ + vector messages; + foreach (const Future& future, futures) { +if (!future.isReady()) { + messages.push_back(future.isFailed() ? future.failure() : "discarded"); +} + } + + if (!messages.empty()) { +return Failure(strings::join("\n", messages)); + } + + const string containerDir = csi::paths::getContainerDir(rootDir, containerId); + Try rmdir = os::rmdir(containerDir); + if (rmdir.isError()) { +return Failure( +"Failed to remove the container directory at '" + +containerDir + "': " + rmdir.error()); + } + + LOG(INFO) << "Removed the container directory at '" << containerDir +<< "' for container " << containerId; + return Nothing(); } diff --git a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp index a70da4f..e05a7b8 100644 --- a/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp +++ b/src/slave/containerizer/mesos/isolators/volume/csi/isolator.hpp @@ -94,6 +94,10 @@ private: const std::vector& mounts, const std::vector>& futures); + process::Future _cleanup( + const ContainerID& containerId, + const std::vector>& futures); + const Flags flags; CSIServer* csiServer;
[mesos] 05/05: Enabled the `volume/csi` isolator in `MesosContainerizer`.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 1fb79fefa4c28e0cf0be2686ee3d808dc30d2bfc Author: Qian Zhang AuthorDate: Mon Aug 10 09:39:54 2020 +0800 Enabled the `volume/csi` isolator in `MesosContainerizer`. Review: https://reviews.apache.org/r/72754 --- src/slave/containerizer/containerizer.cpp | 7 +-- src/slave/containerizer/containerizer.hpp | 4 +++- src/slave/containerizer/mesos/containerizer.cpp | 10 +- src/slave/containerizer/mesos/containerizer.hpp | 4 +++- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/containerizer.cpp b/src/slave/containerizer/containerizer.cpp index 9e44e5e..ba3ab43 100644 --- a/src/slave/containerizer/containerizer.cpp +++ b/src/slave/containerizer/containerizer.cpp @@ -33,6 +33,7 @@ #include "hook/manager.hpp" +#include "slave/csi_server.hpp" #include "slave/flags.hpp" #include "slave/gc.hpp" #include "slave/slave.hpp" @@ -220,7 +221,8 @@ Try Containerizer::create( GarbageCollector* gc, SecretResolver* secretResolver, VolumeGidManager* volumeGidManager, -PendingFutureTracker* futureTracker) +PendingFutureTracker* futureTracker, +CSIServer* csiServer) { // Get the set of containerizer types. const vector _types = strings::split(flags.containerizers, ","); @@ -297,7 +299,8 @@ Try Containerizer::create( secretResolver, nvidia, volumeGidManager, - futureTracker); + futureTracker, + csiServer); if (containerizer.isError()) { return Error("Could not create MesosContainerizer: " + diff --git a/src/slave/containerizer/containerizer.hpp b/src/slave/containerizer/containerizer.hpp index 2b3c4c0..691fdfe 100644 --- a/src/slave/containerizer/containerizer.hpp +++ b/src/slave/containerizer/containerizer.hpp @@ -38,6 +38,7 @@ #include "common/future_tracker.hpp" +#include "slave/csi_server.hpp" #include "slave/gc.hpp" #include "slave/volume_gid_manager/volume_gid_manager.hpp" @@ -78,7 +79,8 @@ public: GarbageCollector* gc, SecretResolver* secretResolver = nullptr, VolumeGidManager* volumeGidManager = nullptr, - PendingFutureTracker* futureTracker = nullptr); + PendingFutureTracker* futureTracker = nullptr, + CSIServer* csiServer = nullptr); // Determine slave resources from flags, probing the system or // querying a delegate. diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index 3c1840c..31d45a0 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -61,6 +61,7 @@ #include "module/manager.hpp" +#include "slave/csi_server.hpp" #include "slave/gc.hpp" #include "slave/paths.hpp" #include "slave/slave.hpp" @@ -114,6 +115,7 @@ #include "slave/containerizer/mesos/isolators/volume/host_path.hpp" #include "slave/containerizer/mesos/isolators/volume/image.hpp" #include "slave/containerizer/mesos/isolators/volume/secret.hpp" +#include "slave/containerizer/mesos/isolators/volume/csi/isolator.hpp" #endif // __linux__ #if ENABLE_SECCOMP_ISOLATOR @@ -180,7 +182,8 @@ Try MesosContainerizer::create( SecretResolver* secretResolver, const Option& nvidia, VolumeGidManager* volumeGidManager, -PendingFutureTracker* futureTracker) +PendingFutureTracker* futureTracker, +CSIServer* csiServer) { Try> isolations = [&flags]() -> Try> { const vector tokens(strings::tokenize(flags.isolation, ",")); @@ -467,6 +470,11 @@ Try MesosContainerizer::create( [secretResolver] (const Flags& flags) -> Try { return VolumeSecretIsolatorProcess::create(flags, secretResolver); }}, + +{"volume/csi", + [csiServer] (const Flags& flags) -> Try { +return VolumeCSIIsolatorProcess::create(flags, csiServer); + }}, #endif // __linux__ // Disk isolators. diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp index 56e4c49..62174df 100644 --- a/src/slave/containerizer/mesos/containerizer.hpp +++ b/src/slave/containerizer/mesos/containerizer.hpp @@ -36,6 +36,7 @@ #include #include +#include "slave/csi_server.hpp" #include "slave/gc.hpp" #include "slave/state.hpp" @@ -75,7 +76,8 @@ public: SecretResolver* secretResolver = nullptr, const Option& nvidia = None(), VolumeGidManager* volumeGidManager = nullptr, - PendingFutureTracker* futureTracker = nullptr); + PendingFutureTracker* futureTracker = nullptr, + CSIServer* csiServer = nullptr); static Try create( const Flags& flags,
[mesos] branch master updated: Updated volume manager to support user specified target path root.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 014431e Updated volume manager to support user specified target path root. 014431e is described below commit 014431e3c1b98e514e327318b52e5c54cc6174df Author: Qian Zhang AuthorDate: Mon Aug 17 19:22:48 2020 +0800 Updated volume manager to support user specified target path root. Review: https://reviews.apache.org/r/72781 --- src/csi/v0_volume_manager.cpp | 29 +++-- src/csi/v0_volume_manager_process.hpp | 1 + src/csi/v1_volume_manager.cpp | 31 --- src/csi/v1_volume_manager_process.hpp | 1 + src/slave/csi_server.cpp | 14 ++ 5 files changed, 31 insertions(+), 45 deletions(-) diff --git a/src/csi/v0_volume_manager.cpp b/src/csi/v0_volume_manager.cpp index 9e840a7..42a23ba 100644 --- a/src/csi/v0_volume_manager.cpp +++ b/src/csi/v0_volume_manager.cpp @@ -92,7 +92,10 @@ VolumeManagerProcess::VolumeManagerProcess( runtime(_runtime), serviceManager(_serviceManager), metrics(_metrics), -secretResolver(_secretResolver) +secretResolver(_secretResolver), +mountRootDir(info.has_target_path_root() + ? info.target_path_root() + : paths::getMountRootDir(rootDir, info.type(), info.name())) { // This should have been validated in `VolumeManager::create`. CHECK(!services.empty()) @@ -210,9 +213,6 @@ Future VolumeManagerProcess::recover() } // Garbage collect leftover mount paths that were failed to remove before. - const string mountRootDir = -paths::getMountRootDir(rootDir, info.type(), info.name()); - Try> mountPaths = paths::getMountPaths(mountRootDir); if (mountPaths.isError()) { // TODO(chhsiao): This could indicate that something is seriously wrong. @@ -723,8 +723,7 @@ Future VolumeManagerProcess::_deleteVolume(const std::string& volumeId) if (volumeState.node_publish_required()) { CHECK_EQ(VolumeState::PUBLISHED, volumeState.state()); -const string targetPath = paths::getMountTargetPath( -paths::getMountRootDir(rootDir, info.type(), info.name()), volumeId); +const string targetPath = paths::getMountTargetPath(mountRootDir, volumeId); // NOTE: Normally the volume should have been cleaned up. However this may // not be true for preprovisioned volumes (e.g., leftover from a previous @@ -929,8 +928,7 @@ Future VolumeManagerProcess::_publishVolume(const string& volumeId) .then(process::defer(self(), &Self::_publishVolume, volumeId)); } - const string targetPath = paths::getMountTargetPath( - paths::getMountRootDir(rootDir, info.type(), info.name()), volumeId); + const string targetPath = paths::getMountTargetPath(mountRootDir, volumeId); // NOTE: The target path will be cleaned up during volume removal. Try mkdir = os::mkdir(targetPath); @@ -959,7 +957,7 @@ Future VolumeManagerProcess::_publishVolume(const string& volumeId) if (nodeCapabilities->stageUnstageVolume) { const string stagingPath = paths::getMountStagingPath( -paths::getMountRootDir(rootDir, info.type(), info.name()), volumeId); +mountRootDir, volumeId); CHECK(os::exists(stagingPath)); request.set_staging_target_path(stagingPath); @@ -1044,8 +1042,7 @@ Future VolumeManagerProcess::__publishVolume(const string& volumeId) .then(process::defer(self(), &Self::__publishVolume, volumeId)); } - const string stagingPath = paths::getMountStagingPath( - paths::getMountRootDir(rootDir, info.type(), info.name()), volumeId); + const string stagingPath = paths::getMountStagingPath(mountRootDir, volumeId); // NOTE: The staging path will be cleaned up in during volume removal. Try mkdir = os::mkdir(stagingPath); @@ -1151,8 +1148,7 @@ Future VolumeManagerProcess::_unpublishVolume(const string& volumeId) checkpointVolumeState(volumeId); } - const string stagingPath = paths::getMountStagingPath( - paths::getMountRootDir(rootDir, info.type(), info.name()), volumeId); + const string stagingPath = paths::getMountStagingPath(mountRootDir, volumeId); CHECK(os::exists(stagingPath)); @@ -1210,8 +1206,7 @@ Future VolumeManagerProcess::__unpublishVolume(const string& volumeId) checkpointVolumeState(volumeId); } - const string targetPath = paths::getMountTargetPath( - paths::getMountRootDir(rootDir, info.type(), info.name()), volumeId); + const string targetPath = paths::getMountTargetPath(mountRootDir, volumeId); CHECK(os::exists(targetPath)); @@ -1255,9 +1250,7 @@ void VolumeManagerProcess::garbageCollectMountPath(const string& volumeId) { CHECK(!volumes.contains(volumeId)); - const strin
[mesos] branch master updated: Updated the help message of the agent flag `--csi_plugin_config_dir`.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 831f172 Updated the help message of the agent flag `--csi_plugin_config_dir`. 831f172 is described below commit 831f172de7908ad8e40d14905cacb3a9c053e832 Author: Qian Zhang AuthorDate: Thu Aug 13 16:37:48 2020 +0800 Updated the help message of the agent flag `--csi_plugin_config_dir`. This is to make the help message of the agent flag `--csi_plugin_config_dir` aligned with the latest protobuf message `CSIPluginInfo`. Review: https://reviews.apache.org/r/72770 --- docs/configuration/agent.md | 7 ++- src/slave/flags.cpp | 7 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/configuration/agent.md b/docs/configuration/agent.md index e8608d9..4899202 100644 --- a/docs/configuration/agent.md +++ b/docs/configuration/agent.md @@ -1548,7 +1548,12 @@ Example config files in this directory: } { "type": "org.apache.mesos.csi.unmanaged-plugin", - "node_service_endpoint": "/var/lib/unmanaged-plugin/csi.sock", + "endpoints": [ +{ + "csi_service": "NODE_SERVICE", + "endpoint": "/var/lib/unmanaged-plugin/csi.sock" +} + ], "target_path_root": "/mnt/unmanaged-plugin" } diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp index 02a5568..878788c 100644 --- a/src/slave/flags.cpp +++ b/src/slave/flags.cpp @@ -150,7 +150,12 @@ mesos::internal::slave::Flags::Flags() "\n" "{\n" " \"type\": \"org.apache.mesos.csi.unmanaged-plugin\",\n" - " \"node_service_endpoint\": \"/var/lib/unmanaged-plugin/csi.sock\",\n" + " \"endpoints\": [\n" + "{\n" + " \"csi_service\": \"NODE_SERVICE\",\n" + " \"endpoint\": \"/var/lib/unmanaged-plugin/csi.sock\"\n" + "}\n" + " ],\n" " \"target_path_root\": \"/mnt/unmanaged-plugin\"\n" "}");
[mesos] 03/04: Introduced a new agent flag `--csi_plugin_config_dir`.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 4d3c3d770eceff12ea5d39dbd57ce6bae602266e Author: Qian Zhang AuthorDate: Mon Jul 13 17:16:28 2020 +0800 Introduced a new agent flag `--csi_plugin_config_dir`. Review: https://reviews.apache.org/r/72672 --- docs/configuration/agent.md | 43 +++ src/slave/flags.cpp | 39 +++ src/slave/flags.hpp | 1 + 3 files changed, 83 insertions(+) diff --git a/docs/configuration/agent.md b/docs/configuration/agent.md index 01ffa38..e8608d9 100644 --- a/docs/configuration/agent.md +++ b/docs/configuration/agent.md @@ -1511,6 +1511,49 @@ Example config file in this directory: + + +--csi_plugin_config_dir=VALUE + + +Path to a directory that contains CSI plugin configs. +Each file in the config dir should contain a JSON object representing +a CSIPluginInfo object which can be either a managed CSI +plugin (i.e. the plugin launched by Mesos as a standalone container) +or an unmanaged CSI plugin (i.e. the plugin launched out of Mesos). + +Example config files in this directory: +{ + "type": "org.apache.mesos.csi.managed-plugin", + "containers": [ +{ + "services": [ +"CONTROLLER_SERVICE", +"NODE_SERVICE" + ], + "command": { +"shell": false, +"value": "managed-plugin", +"arguments": [ + "managed-plugin", + "--endpoint=$(CSI_ENDPOINT)" +] + }, + "resources": [ +{"name": "cpus", "type": "SCALAR", "scalar": {"value": 0.1}}, +{"name": "mem", "type": "SCALAR", "scalar": {"value": 1024}} + ] +} + ] +} +{ + "type": "org.apache.mesos.csi.unmanaged-plugin", + "node_service_endpoint": "/var/lib/unmanaged-plugin/csi.sock", + "target_path_root": "/mnt/unmanaged-plugin" +} + + + --[no-]revocable_cpu_low_priority diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp index 2f88b90..02a5568 100644 --- a/src/slave/flags.cpp +++ b/src/slave/flags.cpp @@ -115,6 +115,45 @@ mesos::internal::slave::Flags::Flags() " \"name\": \"lvm\"\n" "}"); + add(&Flags::csi_plugin_config_dir, + "csi_plugin_config_dir", + "Path to a directory that contains CSI plugin configs.\n" + "Each file in the config dir should contain a JSON object representing\n" + "a `CSIPluginInfo` object which can be either a managed CSI plugin\n" + "(i.e. the plugin launched by Mesos as a standalone container) or an\n" + "unmanaged CSI plugin (i.e. the plugin launched out of Mesos).\n" + "\n" + "Example config files in this directory:\n" + "{\n" + " \"type\": \"org.apache.mesos.csi.managed-plugin\",\n" + " \"containers\": [\n" + "{\n" + " \"services\": [\n" + "\"CONTROLLER_SERVICE\",\n" + "\"NODE_SERVICE\"\n" + " ],\n" + " \"command\": {\n" + "\"shell\": false,\n" + "\"value\": \"managed-plugin\",\n" + "\"arguments\": [\n" + " \"managed-plugin\",\n" + " \"--endpoint=$(CSI_ENDPOINT)\"\n" + "]\n" + " },\n" + " \"resources\": [\n" + "{\"name\": \"cpus\", \"type\": \"SCALAR\", \"scalar\": {\"value\": 0.1}},\n" // NOLINT(whitespace/line_length) + "{\"name\": \"mem\", \"type\": \"SCALAR\", \"scalar\": {\"value\": 1024}}\n" // NOLINT(whitespace/line_length) + " ]\n" + "}\n" + " ]\n" + "}\n" + "\n" + "{\n" + " \"type\": \"org.apache.mesos.csi.unmanaged-plugin\",\n" + " \"node_service_endpoint\": \"/var/lib/unmanaged-plugin/csi.sock\",\n" + " \"target_path_root\": \"/mnt/unmanaged-plugin\"\n" + "}"); + add(&Flags::disk_profile_adaptor, "disk_profile_adaptor", "The name of the disk profile adaptor module that storage resource\n" diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp index c3ff887..51770f5 100644 --- a/src/slave/flags.hpp +++ b/src/slave/flags.hpp @@ -46,6 +46,7 @@ public: bool hostname_lookup; Option resources; Option resource_provider_config_dir; + Option csi_plugin_config_dir; Option disk_profile_adaptor; std::string isolation; std::string launcher;
[mesos] branch master updated (da08b0c -> d2c84d1)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from da08b0c Fixed a compilation issue on Windows with os::spawn. new 8a41f00 Added CSI volume type into the `Volume` protobuf message. new e5514e9 Updated `CSIPluginInfo` for supporting 3rd party CSI plugins. new 4d3c3d7 Introduced a new agent flag `--csi_plugin_config_dir`. new d2c84d1 Improved CSI service manager to support unmanaged CSI plugins. The 4 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: docs/configuration/agent.md| 43 +++ include/mesos/csi/types.hpp| 40 --- include/mesos/csi/types.proto | 90 -- include/mesos/mesos.proto | 129 - .../storage/disk_profile_adaptor.hpp | 4 +- include/mesos/type_utils.hpp | 6 + include/mesos/v1/mesos.proto | 129 - src/CMakeLists.txt | 2 - src/Makefile.am| 10 -- src/common/type_utils.cpp | 18 +++ src/csi/service_manager.cpp| 92 ++- src/csi/service_manager.hpp| 12 +- src/csi/state.proto| 4 +- src/csi/types.cpp | 36 -- src/csi/v0_utils.cpp | 68 +-- src/csi/v0_utils.hpp | 14 ++- src/csi/v0_volume_manager.cpp | 12 +- src/csi/v0_volume_manager.hpp | 8 +- src/csi/v0_volume_manager_process.hpp | 8 +- src/csi/v1_utils.cpp | 68 +-- src/csi/v1_utils.hpp | 14 ++- src/csi/v1_volume_manager.cpp | 12 +- src/csi/v1_volume_manager.hpp | 8 +- src/csi/v1_volume_manager_process.hpp | 8 +- src/csi/volume_manager.hpp | 8 +- src/examples/test_csi_plugin.cpp | 6 +- src/resource_provider/state.proto | 4 +- src/slave/flags.cpp| 39 +++ src/slave/flags.hpp| 1 + src/tests/csi_utils_tests.cpp | 1 - src/tests/disk_profile_adaptor_tests.cpp | 6 +- 31 files changed, 576 insertions(+), 324 deletions(-) delete mode 100644 include/mesos/csi/types.hpp delete mode 100644 include/mesos/csi/types.proto delete mode 100644 src/csi/types.cpp
[mesos] 04/04: Improved CSI service manager to support unmanaged CSI plugins.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit d2c84d14b63f137546c00d69b0309c4543811732 Author: Qian Zhang AuthorDate: Wed Jul 15 16:02:48 2020 +0800 Improved CSI service manager to support unmanaged CSI plugins. Review: https://reviews.apache.org/r/72683 --- src/csi/service_manager.cpp | 92 - src/csi/service_manager.hpp | 12 +- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/src/csi/service_manager.cpp b/src/csi/service_manager.cpp index a87df96..7a8d8e5 100644 --- a/src/csi/service_manager.cpp +++ b/src/csi/service_manager.cpp @@ -137,6 +137,12 @@ public: const Runtime& _runtime, Metrics* _metrics); + ServiceManagerProcess( + const CSIPluginInfo& _info, + const hashset& services, + const Runtime& _runtime, + Metrics* _metrics); + Future recover(); Future getServiceEndpoint(const Service& service); @@ -180,8 +186,15 @@ private: http::Headers headers; Option apiVersion; + + // This is for the managed CSI plugin which will be launched as + // standalone containers. hashmap serviceContainers; + // This is for the unmanaged CSI plugin which is already deployed + // out of Mesos. + hashmap serviceEndpoints; + hashmap> daemons; hashmap>> endpoints; }; @@ -233,8 +246,45 @@ ServiceManagerProcess::ServiceManagerProcess( } +ServiceManagerProcess::ServiceManagerProcess( +const CSIPluginInfo& _info, +const hashset& services, +const Runtime& _runtime, +Metrics* _metrics) + : ProcessBase(process::ID::generate("csi-service-manager")), +agentUrl(), +rootDir(), +info(_info), +containerPrefix(), +authToken(), +contentType(ContentType::PROTOBUF), +runtime(_runtime), +metrics(_metrics) +{ + foreach (const Service& service, services) { +foreach (const CSIPluginEndpoint& serviceEndpoint, info.endpoints()) { + if (serviceEndpoint.csi_service() == service) { +serviceEndpoints[service] = serviceEndpoint.endpoint(); +break; + } +} + +CHECK(serviceEndpoints.contains(service)) + << service << " not found for CSI plugin type '" << info.type() + << "' and name '" << info.name() << "'"; + } +} + + Future ServiceManagerProcess::recover() { + // For the unmanaged CSI plugin, we do not need to recover anything. + if (!serviceEndpoints.empty()) { +return Nothing(); + } + + CHECK(!serviceContainers.empty()); + return getContainers() .then(process::defer(self(), [=]( const hashmap>& containers) @@ -346,6 +396,21 @@ Future ServiceManagerProcess::recover() Future ServiceManagerProcess::getServiceEndpoint(const Service& service) { + // For the unmanaged CSI plugin, get its endpoint from + // `serviceEndpoints` directly. + if (!serviceEndpoints.empty()) { +if (serviceEndpoints.contains(service)) { + return serviceEndpoints.at(service); +} else { + return Failure( + stringify(service) + " not found for CSI plugin type '" + + info.type() + "' and name '" + info.name() + "'"); +} + } + + // For the managed CSI plugin, get its endpoint via its corresponding + // standalone container ID. + CHECK(!serviceContainers.empty()); if (!serviceContainers.contains(service)) { return Failure( stringify(service) + " not found for CSI plugin type '" + info.type() + @@ -362,8 +427,15 @@ Future ServiceManagerProcess::getApiVersion() return apiVersion.get(); } - // Ensure that the plugin has been probed (which does the API version - // detection) through `getEndpoint` before returning the API version. + // Ensure that the unmanaged CSI plugin has been probed (which does the API + // version detection) before returning the API version. + if (!serviceEndpoints.empty()) { +return probeEndpoint(serviceEndpoints.begin()->second) + .then(process::defer(self(), [=] { return CHECK_NOTNONE(apiVersion); })); + } + + // For the managed CSI plugin, `probeEndpoint` will be internally called by + // `getEndpoint` to do the API version detection. CHECK(!serviceContainers.empty()); return getEndpoint(serviceContainers.begin()->second) .then(process::defer(self(), [=] { return CHECK_NOTNONE(apiVersion); })); @@ -790,6 +862,22 @@ ServiceManager::ServiceManager( } +ServiceManager::ServiceManager( +const CSIPluginInfo& info, +const hashset& services, +const process::grpc::client::Runtime& runtime, +Metrics* metrics) + : process(new ServiceManagerProcess( +info, +services, +runtime, +
[mesos] 01/04: Added CSI volume type into the `Volume` protobuf message.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 8a41f00a379431d65b144e120fe60da95e6195e9 Author: Qian Zhang AuthorDate: Wed Jul 8 11:08:08 2020 +0800 Added CSI volume type into the `Volume` protobuf message. Also removed the `types.proto` file which has `VolumeCapability` defined as well so that we will have a single place (`mesos.proto`) to define this protobuf message. Review: https://reviews.apache.org/r/72660 --- include/mesos/csi/types.hpp| 40 -- include/mesos/csi/types.proto | 90 - include/mesos/mesos.proto | 91 ++ .../storage/disk_profile_adaptor.hpp | 4 +- include/mesos/type_utils.hpp | 6 ++ include/mesos/v1/mesos.proto | 91 ++ src/CMakeLists.txt | 2 - src/Makefile.am| 10 --- src/common/type_utils.cpp | 18 + src/csi/state.proto| 4 +- src/csi/types.cpp | 36 - src/csi/v0_utils.cpp | 68 src/csi/v0_utils.hpp | 14 ++-- src/csi/v0_volume_manager.cpp | 12 +-- src/csi/v0_volume_manager.hpp | 8 +- src/csi/v0_volume_manager_process.hpp | 8 +- src/csi/v1_utils.cpp | 68 src/csi/v1_utils.hpp | 14 ++-- src/csi/v1_volume_manager.cpp | 12 +-- src/csi/v1_volume_manager.hpp | 8 +- src/csi/v1_volume_manager_process.hpp | 8 +- src/csi/volume_manager.hpp | 8 +- src/examples/test_csi_plugin.cpp | 6 +- src/resource_provider/state.proto | 4 +- src/tests/csi_utils_tests.cpp | 1 - src/tests/disk_profile_adaptor_tests.cpp | 6 +- 26 files changed, 324 insertions(+), 313 deletions(-) diff --git a/include/mesos/csi/types.hpp b/include/mesos/csi/types.hpp deleted file mode 100644 index df9df38..000 --- a/include/mesos/csi/types.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef __MESOS_CSI_TYPES_HPP__ -#define __MESOS_CSI_TYPES_HPP__ - -// ONLY USEFUL AFTER RUNNING PROTOC. -#include - -namespace mesos { -namespace csi { -namespace types { - -bool operator==(const VolumeCapability& left, const VolumeCapability& right); - - -inline bool operator!=( -const VolumeCapability& left, const VolumeCapability& right) -{ - return !(left == right); -} - -} // namespace types { -} // namespace csi { -} // namespace mesos { - -#endif // __MESOS_CSI_TYPES_HPP__ diff --git a/include/mesos/csi/types.proto b/include/mesos/csi/types.proto deleted file mode 100644 index 3e1ac4b..000 --- a/include/mesos/csi/types.proto +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package mesos.csi.types; - -// This file contains "unversioned" CSI protobuf def
[mesos] 02/04: Updated `CSIPluginInfo` for supporting 3rd party CSI plugins.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit e5514e903478b688c6bcde7ca2551ba424055d10 Author: Qian Zhang AuthorDate: Wed Jul 8 11:22:09 2020 +0800 Updated `CSIPluginInfo` for supporting 3rd party CSI plugins. Review: https://reviews.apache.org/r/72661 --- include/mesos/mesos.proto| 38 ++ include/mesos/v1/mesos.proto | 38 ++ 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index fe371ea..0f91d88 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -1063,7 +1063,7 @@ message SlaveInfo { /** - * Describes the container configuration to run a CSI plugin component. + * Describes the container configuration to run a managed CSI plugin. */ message CSIPluginContainerInfo { enum Service { @@ -1080,6 +1080,15 @@ message CSIPluginContainerInfo { /** + * Describes the endpoint of an unmanaged CSI plugin service. + */ +message CSIPluginEndpoint { + required CSIPluginContainerInfo.Service csi_service = 1; + required string endpoint = 2; +} + + +/** * Describes a CSI plugin. */ message CSIPluginInfo { @@ -1103,13 +1112,34 @@ message CSIPluginInfo { // that the concatenation of type and name is unique in the cluster, and it // remains the same if the instance is migrated to another agent (e.g., there // is a change in the agent ID). - required string name = 2; - - // A list of container configurations to run CSI plugin components. + optional string name = 2 [default = "default"]; + + // We support two kinds of CSI plugins: + // 1. Managed CSI plugins: This is the plugin which will be launched by + // Mesos as standalone container, and Mesos will internally determine + // its endpoint when launching it and manage its whole lifecyle. For this + // kind of plugins, the `containers` field below must be specified. + // 2. Unmanaged CSI plugins: This is the plugin which is launched out of + // Mesos (e.g., manually launched by the operator). For this kind of + // plugins, the `endpoints` field below must be specified because Mesos + // needs it to call CSI gRPC methods. + // Please note that only one of the `containers` and `endpoints` fields should + // be specified. + + // A list of container configurations to run managed CSI plugin. // The controller service will be served by the first configuration // that contains `CONTROLLER_SERVICE`, and the node service will be // served by the first configuration that contains `NODE_SERVICE`. repeated CSIPluginContainerInfo containers = 3; + + // The service endpoints of the unmanaged CSI plugin. An endpoint is usually + // a path to a Unix domain socket. + repeated CSIPluginEndpoint endpoints = 4; + + // The root directory of all the target paths managed by the CSI plugin. + // Each volume will be published by the CSI plugin at a sub-directory + // under this path. + optional string target_path_root = 5; } diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index effcfa1..f25db8a 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -1051,7 +1051,7 @@ message AgentInfo { /** - * Describes the container configuration to run a CSI plugin component. + * Describes the container configuration to run a managed CSI plugin. */ message CSIPluginContainerInfo { enum Service { @@ -1068,6 +1068,15 @@ message CSIPluginContainerInfo { /** + * Describes the endpoint of an unmanaged CSI plugin service. + */ +message CSIPluginEndpoint { + required CSIPluginContainerInfo.Service csi_service = 1; + required string endpoint = 2; +} + + +/** * Describes a CSI plugin. */ message CSIPluginInfo { @@ -1091,13 +1100,34 @@ message CSIPluginInfo { // that the concatenation of type and name is unique in the cluster, and it // remains the same if the instance is migrated to another agent (e.g., there // is a change in the agent ID). - required string name = 2; - - // A list of container configurations to run CSI plugin components. + optional string name = 2 [default = "default"]; + + // We support two kinds of CSI plugins: + // 1. Managed CSI plugins: This is the plugin which will be launched by + // Mesos as standalone container, and Mesos will internally determine + // its endpoint when launching it and manage its whole lifecyle. For this + // kind of plugins, the `containers` field below must be specified. + // 2. Unmanaged CSI plugins: This is the plugin which is launched out of + // Mesos (e.g., manually launched by the operator). For this kind of + // plugins, the `endpoints` field below must be specified because Mesos + // needs it to c
[mesos] 02/02: Added MESOS-10126 to the 1.4.4 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.4.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit e74b07ad485556800df889f03153cfc3687bdbbc Author: Qian Zhang AuthorDate: Fri May 29 17:19:09 2020 +0800 Added MESOS-10126 to the 1.4.4 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 0ce1715..93c5279 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ Release Notes - Mesos - Version 1.4.4 (WIP) ** Bug * [MESOS-9507] - Agent could not recover due to empty docker volume checkpointed files. * [MESOS-9695] - Remove the duplicate pid check in Docker containerizer + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement: * [MESOS-9159] - Support Foreign URLs in docker registry puller.
[mesos] branch master updated: Added MESOS-10126 to the 1.4.4 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new ccfbc91 Added MESOS-10126 to the 1.4.4 CHANGELOG. ccfbc91 is described below commit ccfbc915a841dd06b42103188b4786c6b0a6b090 Author: Qian Zhang AuthorDate: Fri May 29 17:19:09 2020 +0800 Added MESOS-10126 to the 1.4.4 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 2ad9cbf..6e94a3f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2579,6 +2579,7 @@ Release Notes - Mesos - Version 1.4.4 (WIP) ** Bug * [MESOS-9507] - Agent could not recover due to empty docker volume checkpointed files. * [MESOS-9695] - Remove the duplicate pid check in Docker containerizer + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement: * [MESOS-9159] - Support Foreign URLs in docker registry puller.
[mesos] branch 1.4.x updated (f05058d -> e74b07a)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.4.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from f05058d Added MESOS-9695 to the 1.4.4 CHANGELOG. new 8f0d3d0 Erased `Info` struct before unmouting volumes in Docker volume isolator. new e74b07a Added MESOS-10126 to the 1.4.4 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 1 + .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 8 insertions(+), 5 deletions(-)
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.4.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 8f0d3d0857b13924f379b97b9fe4b229f2d5d301 Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index 7be9396..5be7572 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -573,6 +573,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -586,8 +593,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const list>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -612,9 +617,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] branch master updated: Added MESOS-10126 to the 1.5.4 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new f403779 Added MESOS-10126 to the 1.5.4 CHANGELOG. f403779 is described below commit f4037797a3f3a1228ed835d121020ef737f56cae Author: Qian Zhang AuthorDate: Fri May 29 17:15:17 2020 +0800 Added MESOS-10126 to the 1.5.4 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index b7d2526..2ad9cbf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1948,6 +1948,7 @@ Release Notes - Mesos - Version 1.5.4 (WIP) * [MESOS-9856] - REVIVE call with specified role(s) clears filters for all roles of a framework. * [MESOS-9870] - Simultaneous adding/removal of a role from framework's roles and its suppressed roles crashes the master. * [MESOS-10007] - Command executor can miss exit status for short-lived commands due to double-reaping. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-9159] - Support Foreign URLs in docker registry puller.
[mesos] branch 1.5.x updated (d440095 -> 2b92788)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.5.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from d440095 Fixed compiler error. new 4c72e60 Erased `Info` struct before unmouting volumes in Docker volume isolator. new 2b92788 Added MESOS-10126 to the 1.5.4 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 1 + .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 8 insertions(+), 5 deletions(-)
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.5.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 4c72e6098fdf2b38e79954c03385bb0ecacbb489 Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index 619aecb..bc776b4 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -619,6 +619,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -632,8 +639,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const list>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -658,9 +663,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] 02/02: Added MESOS-10126 to the 1.5.4 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.5.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 2b92788e4d3517550121b1465d52fda0adf34393 Author: Qian Zhang AuthorDate: Fri May 29 17:15:17 2020 +0800 Added MESOS-10126 to the 1.5.4 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 28bc3ca..a5f3eff 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,6 +17,7 @@ Release Notes - Mesos - Version 1.5.4 (WIP) * [MESOS-9856] - REVIVE call with specified role(s) clears filters for all roles of a framework. * [MESOS-9870] - Simultaneous adding/removal of a role from framework's roles and its suppressed roles crashes the master. * [MESOS-10007] - Command executor can miss exit status for short-lived commands due to double-reaping. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-9159] - Support Foreign URLs in docker registry puller.
[mesos] branch master updated: Added MESOS-10126 to the 1.6.3 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new f503dc0 Added MESOS-10126 to the 1.6.3 CHANGELOG. f503dc0 is described below commit f503dc0ed05732f21819b3e08cb04da81398f26c Author: Qian Zhang AuthorDate: Fri May 29 17:11:05 2020 +0800 Added MESOS-10126 to the 1.6.3 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index cd42ead..b7d2526 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1470,6 +1470,7 @@ Release Notes - Mesos - Version 1.6.3 (WIP) * [MESOS-9889] - Master CPU high due to unexpected foreachkey behaviour in Master::__reregisterSlave. * [MESOS-9893] - `volume/secret` isolator should cleanup the stored secret from runtime directory when the container is destroyed. * [MESOS-10007] - Command executor can miss exit status for short-lived commands due to double-reaping. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-8880] - Add minimum capabilities in the master.
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.6.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit b0a57116c6794f5d0036ed9c3668f27f29155bd7 Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index 166e3f4..e73bad1 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -627,6 +627,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -640,8 +647,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const list>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -666,9 +671,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] branch 1.6.x updated (11cdf04 -> 79b2d80)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.6.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from 11cdf04 Fixed compiler error. new b0a5711 Erased `Info` struct before unmouting volumes in Docker volume isolator. new 79b2d80 Added MESOS-10126 to the 1.6.3 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 1 + .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 8 insertions(+), 5 deletions(-)
[mesos] 02/02: Added MESOS-10126 to the 1.6.3 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.6.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 79b2d8031525cd04973a6326c68b848152c67d7d Author: Qian Zhang AuthorDate: Fri May 29 17:11:05 2020 +0800 Added MESOS-10126 to the 1.6.3 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 2e35f64..d436bbf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,6 +24,7 @@ Release Notes - Mesos - Version 1.6.3 (WIP) * [MESOS-9889] - Master CPU high due to unexpected foreachkey behaviour in Master::__reregisterSlave. * [MESOS-9893] - `volume/secret` isolator should cleanup the stored secret from runtime directory when the container is destroyed. * [MESOS-10007] - Command executor can miss exit status for short-lived commands due to double-reaping. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-8880] - Add minimum capabilities in the master.
[mesos] branch 1.7.x updated (5f61704 -> 707becd)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.7.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from 5f61704 Prepared the 1.7.3 CHANGELOG for release. new 819b9d8 Erased `Info` struct before unmouting volumes in Docker volume isolator. new 707becd Added MESOS-10126 to the 1.7.4 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 8 .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 15 insertions(+), 5 deletions(-)
[mesos] branch master updated: Added MESOS-10126 to the 1.7.4 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 2abebac Added MESOS-10126 to the 1.7.4 CHANGELOG. 2abebac is described below commit 2abebac973141b370f314d5b17d59fda11c98cd9 Author: Qian Zhang AuthorDate: Fri May 29 16:58:27 2020 +0800 Added MESOS-10126 to the 1.7.4 CHANGELOG. --- CHANGELOG | 8 1 file changed, 8 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index ef5e02a..cd42ead 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -929,6 +929,14 @@ All Resolved Issues: * [MESOS-9036] - Document `linux/seccomp` isolator +Release Notes - Mesos - Version 1.7.4 (WIP) +--- +* This is a bug fix release. + +** Bug + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation + + Release Notes - Mesos - Version 1.7.3 --- * This is a bug fix release.
[mesos] 02/02: Added MESOS-10126 to the 1.7.4 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.7.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 707becdbc30e5347fc464235ec8775dca73421c3 Author: Qian Zhang AuthorDate: Fri May 29 16:58:27 2020 +0800 Added MESOS-10126 to the 1.7.4 CHANGELOG. --- CHANGELOG | 8 1 file changed, 8 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 64921a6..395305d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +Release Notes - Mesos - Version 1.7.4 (WIP) +--- +* This is a bug fix release. + +** Bug + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation + + Release Notes - Mesos - Version 1.7.3 --- * This is a bug fix release.
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.7.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 819b9d8345e701321067f3b14ad2bb78b60d285c Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index c924dde..d2d741a 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -646,6 +646,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -659,8 +666,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const vector>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -685,9 +690,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] branch master updated: Removed `WIP` for 1.7.3 since it has already been released.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 1ec9dec Removed `WIP` for 1.7.3 since it has already been released. 1ec9dec is described below commit 1ec9dec9671973ffc10a1e10f5b44d79c9bb0005 Author: Qian Zhang AuthorDate: Fri May 29 16:55:02 2020 +0800 Removed `WIP` for 1.7.3 since it has already been released. --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 854e53a..ef5e02a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -929,7 +929,7 @@ All Resolved Issues: * [MESOS-9036] - Document `linux/seccomp` isolator -Release Notes - Mesos - Version 1.7.3 (WIP) +Release Notes - Mesos - Version 1.7.3 --- * This is a bug fix release.
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.8.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit cdd3e2924596eecf605eeb73e9c57f23f6643936 Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index 40119d9..c0c689d 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -641,6 +641,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -654,8 +661,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const vector>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -680,9 +685,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] branch 1.8.x updated (bb32bf8 -> f8b8f1e)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.8.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from bb32bf8 Added test for fetch from repositories not providing scope/service. new cdd3e29 Erased `Info` struct before unmouting volumes in Docker volume isolator. new f8b8f1e Added MESOS-10126 to the 1.8.2 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 1 + .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 8 insertions(+), 5 deletions(-)
[mesos] branch master updated: Added MESOS-10126 to the 1.8.2 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new de6cb43 Added MESOS-10126 to the 1.8.2 CHANGELOG. de6cb43 is described below commit de6cb4359186f3ac1bc2c8abe6d55f26a2407a2c Author: Qian Zhang AuthorDate: Fri May 29 16:38:04 2020 +0800 Added MESOS-10126 to the 1.8.2 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 857556f..854e53a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -502,6 +502,7 @@ Release Notes - Mesos - Version 1.8.2 (WIP) * [MESOS-9968] - WWWAuthenticate header parsing fails when commas are in (quoted) realm * [MESOS-10007] - Command executor can miss exit status for short-lived commands due to double-reaping. * [MESOS-10015] - updateAllocation() can stall the allocator with a huge number of reservations on an agent. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-9889] - Master CPU high due to unexpected foreachkey behaviour in Master::__reregisterSlave.
[mesos] 02/02: Added MESOS-10126 to the 1.8.2 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.8.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit f8b8f1e9c0fbf0a3ee769eebcebf0b4e98e0bfa6 Author: Qian Zhang AuthorDate: Fri May 29 16:38:04 2020 +0800 Added MESOS-10126 to the 1.8.2 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 6c5bfd3..537e552 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -14,6 +14,7 @@ Release Notes - Mesos - Version 1.8.2 (WIP) * [MESOS-9968] - WWWAuthenticate header parsing fails when commas are in (quoted) realm * [MESOS-10007] - Command executor can miss exit status for short-lived commands due to double-reaping. * [MESOS-10015] - updateAllocation() can stall the allocator with a huge number of reservations on an agent. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-9889] - Master CPU high due to unexpected foreachkey behaviour in Master::__reregisterSlave.
[mesos] 02/02: Added MESOS-10126 to the 1.9.1 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.9.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 802a50f4902f1f5ca3829dca4a472d8a582f7b9b Author: Qian Zhang AuthorDate: Fri May 29 16:32:17 2020 +0800 Added MESOS-10126 to the 1.9.1 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 59ccef6..ca5e244 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ Release Notes - Mesos - Version 1.9.1 (WIP) * [MESOS-10094] - Master's agent draining VLOG prints incorrect task counts. * [MESOS-10096] - Reactivating a draining agent leaves the agent in draining state. * [MESOS-10118] - Agent incorrectly handles draining when empty. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-9889] - Master CPU high due to unexpected foreachkey behaviour in Master::__reregisterSlave.
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.9.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit dcce73d57b4d8866fedb3f287d978a135616afb3 Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index e4a19c4..6545eaa 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -658,6 +658,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -671,8 +678,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const vector>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -697,9 +702,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] branch master updated: Added MESOS-10126 to the 1.9.1 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 2b3e831 Added MESOS-10126 to the 1.9.1 CHANGELOG. 2b3e831 is described below commit 2b3e83141ef7cf8b8cac1354ccafa1a2c19a70d0 Author: Qian Zhang AuthorDate: Fri May 29 16:32:17 2020 +0800 Added MESOS-10126 to the 1.9.1 CHANGELOG. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 846c5d8..857556f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -224,6 +224,7 @@ Release Notes - Mesos - Version 1.9.1 (WIP) * [MESOS-10094] - Master's agent draining VLOG prints incorrect task counts. * [MESOS-10096] - Reactivating a draining agent leaves the agent in draining state. * [MESOS-10118] - Agent incorrectly handles draining when empty. + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation ** Improvement * [MESOS-9889] - Master CPU high due to unexpected foreachkey behaviour in Master::__reregisterSlave.
[mesos] branch 1.9.x updated (b3b6dbb -> 802a50f)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.9.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from b3b6dbb Added test for reactivation of a disconnected drained agent. new dcce73d Erased `Info` struct before unmouting volumes in Docker volume isolator. new 802a50f Added MESOS-10126 to the 1.9.1 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 1 + .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 8 insertions(+), 5 deletions(-)
[mesos] branch master updated: Added MESOS-10126 to the 1.10.1 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new a3bd7d7 Added MESOS-10126 to the 1.10.1 CHANGELOG. a3bd7d7 is described below commit a3bd7d7c034322ca73046345911df90836b77815 Author: Qian Zhang AuthorDate: Fri May 29 16:19:05 2020 +0800 Added MESOS-10126 to the 1.10.1 CHANGELOG. --- CHANGELOG | 8 1 file changed, 8 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 945048d..846c5d8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +Release Notes - Mesos - Version 1.10.1 (WIP) +--- +* This is a bug fix release. + +** Bug + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation + + Release Notes - Mesos - Version 1.10.0 This release contains the following highlights:
[mesos] branch 1.10.x updated (c8a1224 -> 365ebb9)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch 1.10.x in repository https://gitbox.apache.org/repos/asf/mesos.git. from c8a1224 Updated Mesos version to 1.10.1. new 97251a9 Erased `Info` struct before unmouting volumes in Docker volume isolator. new 365ebb9 Added MESOS-10126 to the 1.10.1 CHANGELOG. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: CHANGELOG| 8 .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 2 files changed, 15 insertions(+), 5 deletions(-)
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.10.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 97251a90d3336bd628c82becca00f545d95b01aa Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index c547696..2f2f624 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -658,6 +658,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -671,8 +678,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const vector>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -697,9 +702,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] 02/02: Added MESOS-10126 to the 1.10.1 CHANGELOG.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch 1.10.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 365ebb9fb8eebdfaa809dc87a33be22df89a8e85 Author: Qian Zhang AuthorDate: Fri May 29 16:19:05 2020 +0800 Added MESOS-10126 to the 1.10.1 CHANGELOG. --- CHANGELOG | 8 1 file changed, 8 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 945048d..846c5d8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +Release Notes - Mesos - Version 1.10.1 (WIP) +--- +* This is a bug fix release. + +** Bug + * [MESOS-10126] - Docker volume isolator needs to clean up the `info` struct regardless the result of unmount operation + + Release Notes - Mesos - Version 1.10.0 This release contains the following highlights:
[mesos] branch master updated (5a04a16 -> b7c3da5)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from 5a04a16 Added a test for os::memory. new 2845330 Erased `Info` struct before unmouting volumes in Docker volume isolator. new b7c3da5 Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../mesos/isolators/docker/volume/isolator.cpp | 12 +- .../containerizer/docker_volume_isolator_tests.cpp | 188 + 2 files changed, 195 insertions(+), 5 deletions(-)
[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 2845330fbd78a80fb7e71c6101724655fa254392 Author: Qian Zhang AuthorDate: Fri May 15 10:23:51 2020 +0800 Erased `Info` struct before unmouting volumes in Docker volume isolator. Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will unmount the volume first, and if the unmount operation fails we will NOT erase the container's `Info` struct from `infos`. This is problematic because the remaining `Info` in `infos` will cause the reference count of the volume is greater than 0, but actually the volume is not being used by any containers. That means we may never get a chance to unmount this volume on this agent, furthermore if it is an EBS volume, it cannot be used by any tasks launched on any other agents since a EBS volume can only be attached to one node at a time. The only workaround would manually unmount the volume. So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase container's `Info` struct before unmounting volumes. Review: https://reviews.apache.org/r/72516 --- .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp index c547696..2f2f624 100644 --- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp +++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp @@ -658,6 +658,13 @@ Future DockerVolumeIsolatorProcess::cleanup( futures.push_back(this->unmount(volume.driver(), volume.name())); } + // Erase the `Info` struct of this container before unmounting the volumes. + // This is to ensure the reference count of the volume will not be wrongly + // increased if unmounting volumes fail, otherwise next time when another + // container using the same volume is destroyed, we would NOT unmount the + // volume since its reference count would be larger than 1. + infos.erase(containerId); + return await(futures) .then(defer( PID(this), @@ -671,8 +678,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( const ContainerID& containerId, const vector>& futures) { - CHECK(infos.contains(containerId)); - vector messages; foreach (const Future& future, futures) { if (!future.isReady()) { @@ -697,9 +702,6 @@ Future DockerVolumeIsolatorProcess::_cleanup( LOG(INFO) << "Removed the checkpoint directory at '" << containerDir << "' for container " << containerId; - // Remove all this container's docker volume information from infos. - infos.erase(containerId); - return Nothing(); }
[mesos] 02/02: Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit b7c3da5a28fb46b4517d52872aec504fff098967 Author: Qian Zhang AuthorDate: Sun May 17 23:30:38 2020 +0800 Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`. Review: https://reviews.apache.org/r/72523 --- .../containerizer/docker_volume_isolator_tests.cpp | 188 + 1 file changed, 188 insertions(+) diff --git a/src/tests/containerizer/docker_volume_isolator_tests.cpp b/src/tests/containerizer/docker_volume_isolator_tests.cpp index 88d0dc7..ef58f7d 100644 --- a/src/tests/containerizer/docker_volume_isolator_tests.cpp +++ b/src/tests/containerizer/docker_volume_isolator_tests.cpp @@ -43,6 +43,8 @@ namespace slave = mesos::internal::slave; +using process::Clock; +using process::Failure; using process::Future; using process::Owned; @@ -1675,6 +1677,192 @@ TEST_F(DockerVolumeIsolatorTest, driver.join(); } + +// This test verifies that unmount operation can be still invoked for +// a docker volume even the previous unmount operation for the same +// docker volume failed. This is a regression test for MESOS-10126. +TEST_F(DockerVolumeIsolatorTest, ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure) +{ + Clock::pause(); + + master::Flags masterFlags = CreateMasterFlags(); + + Try> master = StartMaster(masterFlags); + ASSERT_SOME(master); + + slave::Flags flags = CreateSlaveFlags(); + flags.docker_volume_checkpoint_dir = path::join(os::getcwd(), "checkpoint"); + + MockDockerVolumeDriverClient* mockClient = new MockDockerVolumeDriverClient; + + Try> containerizer = +createContainerizer(flags, Owned(mockClient)); + + ASSERT_SOME(containerizer); + + Owned detector = master.get()->createDetector(); + + Try> slave = StartSlave( + detector.get(), + containerizer->get(), + flags); + + ASSERT_SOME(slave); + + MockScheduler sched; + + MesosSchedulerDriver driver( + &sched, + DEFAULT_FRAMEWORK_INFO, + master.get()->pid, + DEFAULT_CREDENTIAL); + + EXPECT_CALL(sched, registered(&driver, _, _)); + + Future> offers1; + EXPECT_CALL(sched, resourceOffers(&driver, _)) +.WillOnce(FutureArg<1>(&offers1)); + + driver.start(); + + Clock::advance(masterFlags.allocation_interval); + + AWAIT_READY(offers1); + ASSERT_FALSE(offers1->empty()); + + const Offer& offer1 = offers1.get()[0]; + + // Create a docker volume with relative path. + const string volumeDriver = "driver"; + const string volumeName = "name"; + const string containerPath = "tmp/foo"; + + Volume volume = createDockerVolume(volumeDriver, volumeName, containerPath); + + // Launch the first task with the docker volume. + TaskInfo task1 = createTask( + offer1.slave_id(), + offer1.resources(), + "test -f " + containerPath + "/file"); + + ContainerInfo containerInfo; + containerInfo.set_type(ContainerInfo::MESOS); + containerInfo.add_volumes()->CopyFrom(volume); + + task1.mutable_container()->CopyFrom(containerInfo); + + // Create mount point for the volume. + const string mountPoint = path::join(os::getcwd(), "volume"); + ASSERT_SOME(os::mkdir(mountPoint)); + ASSERT_SOME(os::touch(path::join(mountPoint, "file"))); + + Future mountName1; + EXPECT_CALL(*mockClient, mount(volumeDriver, _, _)) +.WillOnce(DoAll(FutureArg<1>(&mountName1), +Return(mountPoint))); + + // Simulate an unmount failure. + Future unmountName1; + EXPECT_CALL(*mockClient, unmount(volumeDriver, _)) +.WillOnce(DoAll(FutureArg<1>(&unmountName1), +Return(Failure("Mock failure"; + + Future statusStarting1; + Future statusRunning1; + Future statusFinished1; + + EXPECT_CALL(sched, statusUpdate(&driver, _)) +.WillOnce(FutureArg<1>(&statusStarting1)) +.WillOnce(FutureArg<1>(&statusRunning1)) +.WillOnce(FutureArg<1>(&statusFinished1)); + + Future> offers2; + EXPECT_CALL(sched, resourceOffers(&driver, _)) +.WillOnce(FutureArg<1>(&offers2)) +.WillRepeatedly(Return()); + + driver.launchTasks(offer1.id(), {task1}); + + AWAIT_READY(statusStarting1); + EXPECT_EQ(TASK_STARTING, statusStarting1->state()); + + AWAIT_READY(statusRunning1); + EXPECT_EQ(TASK_RUNNING, statusRunning1->state()); + + // Make sure the docker volume mount parameters are same with the + // parameters in `containerInfo`. + AWAIT_EXPECT_EQ(volumeName, mountName1); + + AWAIT_READY(statusFinished1); + EXPECT_EQ(TASK_FINISHED, statusFinished1->state()); + + Clock::resume(); + + // Make sure the docker volume unmount parameters are same with + // the parameters in `containerInfo`. + AWAIT_EXPECT_EQ(volumeName, unm
[mesos] branch master updated (6bb60a4 -> 95b8064)
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git. from 6bb60a4 Reverted the changes about `REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED`. new 0a9d97f Added `cpus_soft_limit` field to `ResourceStatistics` protobuf message. new ae956f9 Updated UCR's `usage()` method to support resource limits. new 97dc2b0 Updated Docker containerizer by not updating resources for command task. new 929932f Updated Docker containerizer to set Docker container's resource limits. new 95b8064 Updated Docker containerizer's `usage()` to support resource limits. The 5 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: include/mesos/mesos.proto | 9 +- include/mesos/v1/mesos.proto | 9 +- include/mesos/values.hpp | 1 + src/common/values.cpp | 6 + src/slave/containerizer/docker.cpp | 350 +++-- src/slave/containerizer/docker.hpp | 20 +- src/slave/containerizer/mesos/containerizer.cpp| 119 ++- src/slave/containerizer/mesos/containerizer.hpp| 7 +- .../containerizer/docker_containerizer_tests.cpp | 182 ++- src/tests/slave_recovery_tests.cpp | 3 +- src/tests/slave_tests.cpp | 6 +- 11 files changed, 425 insertions(+), 287 deletions(-)
[mesos] 02/05: Updated UCR's `usage()` method to support resource limits.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit ae956f9dbd30c712eb1310c2f7c98d11f6d5c993 Author: Qian Zhang AuthorDate: Fri Apr 17 20:53:43 2020 +0800 Updated UCR's `usage()` method to support resource limits. Review: https://reviews.apache.org/r/72399 --- src/slave/containerizer/mesos/containerizer.cpp | 119 src/slave/containerizer/mesos/containerizer.hpp | 7 +- src/tests/slave_recovery_tests.cpp | 3 +- src/tests/slave_tests.cpp | 6 +- 4 files changed, 112 insertions(+), 23 deletions(-) diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index 6aa4f3f..3c1840c 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -1450,7 +1450,8 @@ Future MesosContainerizerProcess::launch( Owned container(new Container()); container->config = containerConfig; - container->resources = containerConfig.resources(); + container->resourceRequests = containerConfig.resources(); + container->resourceLimits = containerConfig.limits(); container->directory = containerConfig.directory(); // Maintain the 'children' list in the parent's 'Container' struct, @@ -2432,7 +2433,8 @@ Future MesosContainerizerProcess::update( // NOTE: We update container's resources before isolators are updated // so that subsequent containerizer->update can be handled properly. - container->resources = resourceRequests; + container->resourceRequests = resourceRequests; + container->resourceLimits = resourceLimits; // Update each isolator. vector> futures; @@ -2454,12 +2456,11 @@ Future MesosContainerizerProcess::update( } -// Resources are used to set the limit fields in the statistics but -// are optional because they aren't known after recovery until/unless -// update() is called. Future _usage( const ContainerID& containerId, -const Option& resources, +const Option& resourceRequests, +const Option>& resourceLimits, +bool enableCfsQuota, const vector>& statistics) { ResourceStatistics result; @@ -2478,17 +2479,76 @@ Future _usage( } } - if (resources.isSome()) { -// Set the resource allocations. -Option mem = resources->mem(); -if (mem.isSome()) { - result.set_mem_limit_bytes(mem->bytes()); + Option cpuRequest, cpuLimit, memLimit; + Option memRequest; + + if (resourceRequests.isSome()) { +cpuRequest = resourceRequests->cpus(); +memRequest = resourceRequests->mem(); + } + + if (resourceLimits.isSome()) { +foreach (auto&& limit, resourceLimits.get()) { + if (limit.first == "cpus") { +cpuLimit = limit.second.value(); + } else if (limit.first == "mem") { +memLimit = limit.second.value(); + } +} + } + + if (cpuRequest.isSome()) { +result.set_cpus_soft_limit(cpuRequest.get()); + } + + if (cpuLimit.isSome()) { +// Get the total CPU numbers of this node, we will use it to set container's +// hard CPU limit if the CPU limit specified by framework is infinity. +static Option totalCPUs; +if (totalCPUs.isNone()) { + Try cpus = os::cpus(); + if (cpus.isError()) { +return Failure( +"Failed to auto-detect the number of cpus: " + cpus.error()); + } + + totalCPUs = cpus.get(); } -Option cpus = resources->cpus(); -if (cpus.isSome()) { - result.set_cpus_limit(cpus.get()); +CHECK_SOME(totalCPUs); + +result.set_cpus_limit( +std::isinf(cpuLimit.get()) ? totalCPUs.get() : cpuLimit.get()); + } else if (enableCfsQuota && cpuRequest.isSome()) { +result.set_cpus_limit(cpuRequest.get()); + } + + if (memRequest.isSome()) { +result.set_mem_soft_limit_bytes(memRequest->bytes()); + } + + if (memLimit.isSome()) { +// Get the total memory of this node, we will use it to set container's hard +// memory limit if the memory limit specified by framework is infinity. +static Option totalMem; +if (totalMem.isNone()) { + Try mem = os::memory(); + if (mem.isError()) { +return Failure( +"Failed to auto-detect the size of main memory: " + mem.error()); + } + + totalMem = mem->total; } + +CHECK_SOME(totalMem); + +result.set_mem_limit_bytes( +std::isinf(memLimit.get()) + ? totalMem->bytes() + : Megabytes(static_cast(memLimit.get())).bytes()); + } else if (memRequest.isSome()) { +result.set_mem_limit_bytes(memRequest->bytes()); } return result; @@ -2514,14 +2574,39 @@ Future MesosContainerizerProcess::us
[mesos] 05/05: Updated Docker containerizer's `usage()` to support resource limits.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 95b806474da6f63ec8d50904a4336f903c0c5d08 Author: Qian Zhang AuthorDate: Tue Apr 21 09:30:26 2020 +0800 Updated Docker containerizer's `usage()` to support resource limits. Review: https://reviews.apache.org/r/72402 --- src/slave/containerizer/docker.cpp | 105 +++-- .../containerizer/docker_containerizer_tests.cpp | 25 +++-- 2 files changed, 116 insertions(+), 14 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 8aed025..431f7c6 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -2082,16 +2082,105 @@ Future DockerContainerizerProcess::usage( result = cgroupStats.get(); #endif // __linux__ -// Set the resource allocations. -const Resources& resource = container->resourceRequests; -const Option mem = resource.mem(); -if (mem.isSome()) { - result.set_mem_limit_bytes(mem->bytes()); +Option cpuRequest, cpuLimit, memLimit; +Option memRequest; + +// For command tasks, we should subtract the default resources (0.1 cpus and +// 32MB memory) for command executor from the container's resource requests +// and limits, otherwise we would report wrong resource statistics. +if (container->resourceRequests.cpus().isSome()) { + if (container->generatedForCommandTask) { +cpuRequest = + container->resourceRequests.cpus().get() - DEFAULT_EXECUTOR_CPUS; + } else { +cpuRequest = container->resourceRequests.cpus(); + } +} + +if (container->resourceRequests.mem().isSome()) { + if (container->generatedForCommandTask) { +memRequest = + container->resourceRequests.mem().get() - DEFAULT_EXECUTOR_MEM; + } else { +memRequest = container->resourceRequests.mem(); + } +} + +foreach (auto&& limit, container->resourceLimits) { + if (limit.first == "cpus") { +if (container->generatedForCommandTask && +!std::isinf(limit.second.value())) { + cpuLimit = limit.second.value() - DEFAULT_EXECUTOR_CPUS; +} else { + cpuLimit = limit.second.value(); +} + } else if (limit.first == "mem") { +if (container->generatedForCommandTask && +!std::isinf(limit.second.value())) { + memLimit = limit.second.value() - + DEFAULT_EXECUTOR_MEM.bytes() / Bytes::MEGABYTES; +} else { + memLimit = limit.second.value(); +} + } +} + +if (cpuRequest.isSome()) { + result.set_cpus_soft_limit(cpuRequest.get()); +} + +if (cpuLimit.isSome()) { + // Get the total CPU numbers of this node, we will use + // it to set container's hard CPU limit if the CPU limit + // specified by framework is infinity. + static Option totalCPUs; + if (totalCPUs.isNone()) { +Try cpus = os::cpus(); +if (cpus.isError()) { + return Failure( + "Failed to auto-detect the number of cpus: " + cpus.error()); +} + +totalCPUs = cpus.get(); + } + + CHECK_SOME(totalCPUs); + + result.set_cpus_limit( + std::isinf(cpuLimit.get()) ? totalCPUs.get() : cpuLimit.get()); +#ifdef __linux__ +} else if (flags.cgroups_enable_cfs && cpuRequest.isSome()) { + result.set_cpus_limit(cpuRequest.get()); +#endif } -const Option cpus = resource.cpus(); -if (cpus.isSome()) { - result.set_cpus_limit(cpus.get()); +if (memLimit.isSome()) { + // Get the total memory of this node, we will use it to + // set container's hard memory limit if the memory limit + // specified by framework is infinity. + static Option totalMem; + if (totalMem.isNone()) { +Try mem = os::memory(); +if (mem.isError()) { + return Failure( + "Failed to auto-detect the size of main memory: " + mem.error()); +} + +totalMem = mem->total; + } + + CHECK_SOME(totalMem); + + result.set_mem_limit_bytes( + std::isinf(memLimit.get()) +? totalMem->bytes() +: Megabytes(static_cast(memLimit.get())).bytes()); + + if (memRequest.isSome()) { +result.set_mem_soft_limit_bytes(memRequest->bytes()); + } +} else if (memRequest.isSome()) { + result.set_mem_limit_bytes(memRequest->bytes()); } return result; diff --git a/src/tests/containerizer/docker_containerizer_tests.cpp b/src/tests/containerizer/docker_containerizer_tests.cpp index 42692dc..fc3a651 100644 --- a/src/tests/containerizer/docker_contain
[mesos] 01/05: Added `cpus_soft_limit` field to `ResourceStatistics` protobuf message.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 0a9d97f801551d273b17d6c4515b4a8884c0be91 Author: Qian Zhang AuthorDate: Fri Apr 17 20:53:03 2020 +0800 Added `cpus_soft_limit` field to `ResourceStatistics` protobuf message. Review: https://reviews.apache.org/r/72398 --- include/mesos/mesos.proto| 9 ++--- include/mesos/v1/mesos.proto | 9 ++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index 470343c..5f795f5 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -1759,9 +1759,12 @@ message ResourceStatistics { optional double cpus_user_time_secs = 2; optional double cpus_system_time_secs = 3; - // Number of CPUs allocated. + // Hard CPU limit. optional double cpus_limit = 4; + // Soft CPU limit. + optional double cpus_soft_limit = 45; + // cpu.stat on process throttling (for contention issues). optional uint32 cpus_nr_periods = 7; optional uint32 cpus_nr_throttled = 8; @@ -1779,10 +1782,10 @@ message ResourceStatistics { // Total memory + swap usage. This is set if swap is enabled. optional uint64 mem_total_memsw_bytes = 37; - // Hard memory limit for a container. + // Hard memory limit. optional uint64 mem_limit_bytes = 6; - // Soft memory limit for a container. + // Soft memory limit. optional uint64 mem_soft_limit_bytes = 38; // Broken out memory usage information: pagecache, rss (anonymous), diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index ecf717a..07d2f40 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -1719,9 +1719,12 @@ message ResourceStatistics { optional double cpus_user_time_secs = 2; optional double cpus_system_time_secs = 3; - // Number of CPUs allocated. + // Hard CPU limit. optional double cpus_limit = 4; + // Soft CPU limit. + optional double cpus_soft_limit = 45; + // cpu.stat on process throttling (for contention issues). optional uint32 cpus_nr_periods = 7; optional uint32 cpus_nr_throttled = 8; @@ -1739,10 +1742,10 @@ message ResourceStatistics { // Total memory + swap usage. This is set if swap is enabled. optional uint64 mem_total_memsw_bytes = 37; - // Hard memory limit for a container. + // Hard memory limit. optional uint64 mem_limit_bytes = 6; - // Soft memory limit for a container. + // Soft memory limit. optional uint64 mem_soft_limit_bytes = 38; // Broken out memory usage information: pagecache, rss (anonymous),
[mesos] 04/05: Updated Docker containerizer to set Docker container's resource limits.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 929932fc2bd753f097a26caea5b3e7f7f3ac9118 Author: Qian Zhang AuthorDate: Thu Apr 30 09:29:49 2020 +0800 Updated Docker containerizer to set Docker container's resource limits. This is to ensure the resource limits of Docker container where custom executor runs can be correctly updated when a new task is launched or an existing task terminates. And the `resource` field in the `Container` struct is also renamed to `resourceRequests`. Review: https://reviews.apache.org/r/72391 --- include/mesos/values.hpp | 1 + src/common/values.cpp | 6 + src/slave/containerizer/docker.cpp | 242 - src/slave/containerizer/docker.hpp | 15 ++- 4 files changed, 174 insertions(+), 90 deletions(-) diff --git a/include/mesos/values.hpp b/include/mesos/values.hpp index 27f71d1..9288503 100644 --- a/include/mesos/values.hpp +++ b/include/mesos/values.hpp @@ -27,6 +27,7 @@ namespace mesos { std::ostream& operator<<(std::ostream& stream, const Value::Scalar& scalar); bool operator==(const Value::Scalar& left, const Value::Scalar& right); +bool operator!=(const Value::Scalar& left, const Value::Scalar& right); bool operator<(const Value::Scalar& left, const Value::Scalar& right); bool operator<=(const Value::Scalar& left, const Value::Scalar& right); bool operator>(const Value::Scalar& left, const Value::Scalar& right); diff --git a/src/common/values.cpp b/src/common/values.cpp index 7520382..d7bc91b 100644 --- a/src/common/values.cpp +++ b/src/common/values.cpp @@ -99,6 +99,12 @@ bool operator==(const Value::Scalar& left, const Value::Scalar& right) } +bool operator!=(const Value::Scalar& left, const Value::Scalar& right) +{ + return !(left == right); +} + + bool operator<(const Value::Scalar& left, const Value::Scalar& right) { return convertToFixed(left.value()) < convertToFixed(right.value()); diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 3aa6a99..8aed025 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -541,7 +541,7 @@ Try DockerContainerizerProcess::updatePersistentVolumes( continue; } - if (_container->resources.contains(resource)) { + if (_container->resourceRequests.contains(resource)) { isVolumeInUse = true; break; } @@ -612,7 +612,7 @@ Future DockerContainerizerProcess::mountPersistentVolumes( container->state = Container::MOUNTING; if (!container->containerConfig.has_task_info() && - !container->resources.persistentVolumes().empty()) { + !container->resourceRequests.persistentVolumes().empty()) { LOG(ERROR) << "Persistent volumes found with container '" << containerId << "' but are not supported with custom executors"; return Nothing(); @@ -622,7 +622,7 @@ Future DockerContainerizerProcess::mountPersistentVolumes( containerId, container->containerWorkDir, Resources(), - container->resources); + container->resourceRequests); if (updateVolumes.isError()) { return Failure(updateVolumes.error()); @@ -1333,7 +1333,10 @@ Future DockerContainerizerProcess::_launch( // --cpu-quota to the 'docker run' call in // launchExecutorContainer. return update( - containerId, containerConfig.executor_info().resources(), {}, true) + containerId, + containerConfig.executor_info().resources(), + containerConfig.limits(), + true) .then([=]() { return Future(dockerContainer); }); @@ -1384,7 +1387,7 @@ Future DockerContainerizerProcess::launchExecutorContainer( containerName, container->containerWorkDir, flags.sandbox_directory, -container->resources, +container->resourceRequests, #ifdef __linux__ flags.cgroups_enable_cfs, #else @@ -1392,8 +1395,8 @@ Future DockerContainerizerProcess::launchExecutorContainer( #endif container->environment, None(), // No extra devices. -flags.docker_mesos_image.isNone() ? flags.default_container_dns : None() -); +flags.docker_mesos_image.isNone() ? flags.default_container_dns : None(), +container->resourceLimits); if (runOptions.isError()) { return Failure(runOptions.error()); @@ -1516,7 +1519,7 @@ Future DockerContainerizerProcess::launchExecutorProcess( Future allocateGpus = Nothing(); #ifdef __linux__ - Option gpus = Resources(container->resour
[mesos] 03/05: Updated Docker containerizer by not updating resources for command task.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 97dc2b069965929105d6241c57f8cb6ee77a5e35 Author: Qian Zhang AuthorDate: Tue Apr 21 16:46:17 2020 +0800 Updated Docker containerizer by not updating resources for command task. For command task, its resources will be set when it is launched as a Docker container by Docker executor, and we do not need to update its resources afterward since we do not support task resizing. But for the case that a custom executor launched as a Docker container by Docker containerizer, we need to update its resources when it launches a new task or an existing task terminates. Review: https://reviews.apache.org/r/72401 --- src/slave/containerizer/docker.cpp | 11 ++ src/slave/containerizer/docker.hpp | 5 +- .../containerizer/docker_containerizer_tests.cpp | 157 - 3 files changed, 15 insertions(+), 158 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 492ac27..3aa6a99 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -1007,6 +1007,7 @@ Future DockerContainerizerProcess::_recover( Container* container = new Container(containerId); containers_[containerId] = container; container->state = Container::RUNNING; +container->generatedForCommandTask = executor.generatedForCommandTask; container->launchesExecutorContainer = executorContainers.contains(containerId); @@ -1675,6 +1676,16 @@ Future DockerContainerizerProcess::update( return Nothing(); } + if (container->generatedForCommandTask) { +LOG(INFO) << "Ignoring updating container " << containerId + << " because it is generated for a command task"; + +// Store the resources for usage(). +container->resources = resourceRequests; + +return Nothing(); + } + if (container->resources == resourceRequests && !force) { LOG(INFO) << "Ignoring updating container " << containerId << " because resources passed to update are identical to" diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp index 09fc279..d3d5f3a 100644 --- a/src/slave/containerizer/docker.hpp +++ b/src/slave/containerizer/docker.hpp @@ -353,7 +353,8 @@ private: symlinked(symlinked), containerWorkDir(containerWorkDir), containerName(name(id)), -launchesExecutorContainer(launchesExecutorContainer) +launchesExecutorContainer(launchesExecutorContainer), +generatedForCommandTask(_containerConfig.has_task_info()) { // NOTE: The task's resources are included in the executor's // resources in order to make sure when launching the executor @@ -531,6 +532,8 @@ private: // Marks if this container launches an executor in a docker // container. bool launchesExecutorContainer; + +bool generatedForCommandTask; }; hashmap containers_; diff --git a/src/tests/containerizer/docker_containerizer_tests.cpp b/src/tests/containerizer/docker_containerizer_tests.cpp index b069f51..42692dc 100644 --- a/src/tests/containerizer/docker_containerizer_tests.cpp +++ b/src/tests/containerizer/docker_containerizer_tests.cpp @@ -1086,163 +1086,6 @@ TEST_F(DockerContainerizerTest, ROOT_DOCKER_Usage) } -#ifdef __linux__ -TEST_F(DockerContainerizerTest, ROOT_DOCKER_Update) -{ - Try> master = StartMaster(); - ASSERT_SOME(master); - - MockDocker* mockDocker = -new MockDocker(tests::flags.docker, tests::flags.docker_socket); - - Shared docker(mockDocker); - - slave::Flags flags = CreateSlaveFlags(); - - Fetcher fetcher(flags); - - Try logger = -ContainerLogger::create(flags.container_logger); - - ASSERT_SOME(logger); - - MockDockerContainerizer dockerContainerizer( - flags, - &fetcher, - Owned(logger.get()), - docker); - - Owned detector = master.get()->createDetector(); - - Try> slave = -StartSlave(detector.get(), &dockerContainerizer, flags); - ASSERT_SOME(slave); - - MockScheduler sched; - MesosSchedulerDriver driver( - &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); - - Future frameworkId; - EXPECT_CALL(sched, registered(&driver, _, _)) -.WillOnce(FutureArg<1>(&frameworkId)); - - Future> offers; - EXPECT_CALL(sched, resourceOffers(&driver, _)) -.WillOnce(FutureArg<1>(&offers)) -.WillRepeatedly(Return()); // Ignore subsequent offers. - - driver.start(); - - AWAIT_READY(frameworkId); - - AWAIT_READY(offers); - ASSERT_FALSE(offers->empty()); - - TaskInfo task =
[mesos] branch master updated: Reverted the changes about `REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED`.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 6bb60a4 Reverted the changes about `REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED`. 6bb60a4 is described below commit 6bb60a4869394f663a09370016127ae8688cbe06 Author: Qian Zhang AuthorDate: Mon Apr 27 22:34:51 2020 +0800 Reverted the changes about `REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED`. The method `MemorySubsystemProcess::oomWaited()` will only be invoked when the container is OOM killed because it uses more memory than its hard memory limit (i.e., the task status reason `REASON_CONTAINER_LIMITATION_MEMORY`), it will NOT be invoked when a burstable container is OOM killed because the agent host is running out of memory, i.e., we will NOT receive OOM killing notification via cgroups notification API for this case. So it is not possible for Mesos to provide a task status reason `REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED` for this case. Review: https://reviews.apache.org/r/72442 --- include/mesos/mesos.proto | 1 - include/mesos/v1/mesos.proto | 1 - src/common/protobuf_utils.cpp | 3 +-- .../mesos/isolators/cgroups/subsystems/memory.cpp | 24 +- 4 files changed, 2 insertions(+), 27 deletions(-) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index 9412ed7..470343c 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -2643,7 +2643,6 @@ message TaskStatus { REASON_CONTAINER_LIMITATION = 19; REASON_CONTAINER_LIMITATION_DISK = 20; REASON_CONTAINER_LIMITATION_MEMORY = 8; -REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED = 35; REASON_CONTAINER_PREEMPTED = 17; REASON_CONTAINER_UPDATE_FAILED = 22; REASON_MAX_COMPLETION_TIME_REACHED = 33; diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index 194c42c..ecf717a 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -2632,7 +2632,6 @@ message TaskStatus { REASON_CONTAINER_LIMITATION = 19; REASON_CONTAINER_LIMITATION_DISK = 20; REASON_CONTAINER_LIMITATION_MEMORY = 8; -REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED = 35; REASON_CONTAINER_PREEMPTED = 17; REASON_CONTAINER_UPDATE_FAILED = 22; REASON_MAX_COMPLETION_TIME_REACHED = 33; diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp index 8d1d5c4..723d85a 100644 --- a/src/common/protobuf_utils.cpp +++ b/src/common/protobuf_utils.cpp @@ -254,8 +254,7 @@ StatusUpdate createStatusUpdate( CHECK( reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION || reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK || -reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY || -reason.get() == TaskStatus::REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED) +reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) << reason.get(); status->mutable_limitation()->mutable_resources()->CopyFrom( diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp index 60c7a89..15f87ba 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp @@ -699,33 +699,11 @@ void MemorySubsystemProcess::oomWaited( ? (double) usage->bytes() / Bytes::MEGABYTES : 0), "*").get(); - TaskStatus::Reason reason = TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY; - - // If the container has a hard limit set higher than the soft limit, then - // check if the memory usage is above the soft limit but less than the hard - // limit. If so, we send a task status reason to the scheduler which indicates - // that this container was preferentially OOM-killed because it exceeded its - // memory request without hitting its memory limit. - Try softLimit = -cgroups::memory::soft_limit_in_bytes(hierarchy, cgroup); - - if (softLimit.isError()) { -LOG(ERROR) << "Failed to read 'memory.soft_limit_in_bytes': " - << softLimit.error(); - } else if (softLimit.get() < limit.get()) { -if (!usage.isError() && -!limit.isError() && -usage.get() > softLimit.get() && -usage.get() < limit.get()) { - reason = TaskStatus::REASON_CONTAINER_MEMORY_REQUEST_EXCEEDED; -} - } - infos[containerId]->limitation.set( protobuf::slave::createContainerLimitation( mem, message.str(), - reason)); + TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY)); }
[mesos] branch master updated: Set OOM score adj when Docker container's memory limit is infinite.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new 68ce147 Set OOM score adj when Docker container's memory limit is infinite. 68ce147 is described below commit 68ce1476aebe10db7107c0f3dc813af78ec20cef Author: Qian Zhang AuthorDate: Mon Apr 27 14:14:15 2020 +0800 Set OOM score adj when Docker container's memory limit is infinite. Review: https://reviews.apache.org/r/72435 --- src/docker/docker.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/docker/docker.cpp b/src/docker/docker.cpp index a5f15dd..8ad5833 100644 --- a/src/docker/docker.cpp +++ b/src/docker/docker.cpp @@ -700,7 +700,8 @@ Try Docker::RunOptions::create( if (memRequest.isSome()) { options.memoryReservation = std::max(memRequest.get(), MIN_MEMORY); - if (memRequest.get() < Megabytes(static_cast(memLimit.get( { + if (std::isinf(memLimit.get()) || + memRequest.get() < Megabytes(static_cast(memLimit.get( { Try oomScoreAdj = calculateOOMScoreAdj(memRequest.get()); if (oomScoreAdj.isError()) { return Error(
[mesos] branch master updated: Avoided error in Mesos logs upon docker image fetch
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git The following commit(s) were added to refs/heads/master by this push: new e6ba06c Avoided error in Mesos logs upon docker image fetch e6ba06c is described below commit e6ba06cdf449c7c23904c63df728b78d2fbb9678 Author: =?UTF-8?q?Gr=C3=A9goire=20Seux?= AuthorDate: Wed Mar 25 07:34:34 2020 + Avoided error in Mesos logs upon docker image fetch When fetching docker image, fetcher tests curl support for `--http1.1` flag. This patch makes sure we properly capture stderr to avoid displaying warnings in Mesos logs. This closes #354 --- src/uri/fetchers/docker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uri/fetchers/docker.cpp b/src/uri/fetchers/docker.cpp index e47d9d9..1256294 100644 --- a/src/uri/fetchers/docker.cpp +++ b/src/uri/fetchers/docker.cpp @@ -114,7 +114,7 @@ static Future curl( // infrastructures. The '--http1.1' flag got added to curl with // with version 7.33.0. Some supported distributions do still come // with curl version 7.19.0. See MESOS-8907. -http11 = os::system("curl --http1.1 -V 2>&1 >/dev/null") == 0; +http11 = os::system("curl --http1.1 -V > /dev/null 2>&1") == 0; VLOG(1) << "Curl accepts --http1.1 flag: " << stringify(http11); initialized->done(); }
[mesos] 08/21: Set container process's OOM score adjust.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 8d51df87b058144d0ce51008e393b6261b6e9765 Author: Qian Zhang AuthorDate: Thu Jan 2 09:05:43 2020 +0800 Set container process's OOM score adjust. Review: https://reviews.apache.org/r/71944 --- .../mesos/isolators/cgroups/cgroups.cpp| 8 +- .../mesos/isolators/cgroups/subsystem.cpp | 9 ++- .../mesos/isolators/cgroups/subsystem.hpp | 7 +- .../mesos/isolators/cgroups/subsystems/devices.cpp | 3 +- .../mesos/isolators/cgroups/subsystems/devices.hpp | 3 +- .../mesos/isolators/cgroups/subsystems/memory.cpp | 87 +- .../mesos/isolators/cgroups/subsystems/memory.hpp | 14 +++- .../mesos/isolators/cgroups/subsystems/net_cls.cpp | 3 +- .../mesos/isolators/cgroups/subsystems/net_cls.hpp | 3 +- .../isolators/cgroups/subsystems/perf_event.cpp| 3 +- .../isolators/cgroups/subsystems/perf_event.hpp| 3 +- src/slave/containerizer/mesos/utils.cpp| 20 + src/slave/containerizer/mesos/utils.hpp| 3 + 13 files changed, 148 insertions(+), 18 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp index 8e858f4..4193538 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp @@ -484,7 +484,8 @@ Future> CgroupsIsolatorProcess::prepare( infos[containerId]->subsystems.insert(subsystem->name()); prepares.push_back(subsystem->prepare( containerId, - infos[containerId]->cgroup)); + infos[containerId]->cgroup, + containerConfig)); } // Chown the cgroup so the executor or a nested container whose @@ -705,10 +706,7 @@ Future CgroupsIsolatorProcess::isolate( // containers with shared cgroups, because we don't call `prepare()`, // `recover()`, or `cleanup()` on them either. If we were to call // `isolate()` on them, the call would likely fail because the subsystem - // doesn't know about the container. This is currently OK because - // the only cgroup isolator that even implements `isolate()` is the - // `NetClsSubsystem` and it doesn't do anything with the `pid` - // passed in. + // doesn't know about the container. // // TODO(klueska): In the future we should revisit this to make // sure that doing things this way is sufficient (or otherwise diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystem.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystem.cpp index d9c8fa7..6393bee 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/subsystem.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystem.cpp @@ -116,13 +116,15 @@ Future Subsystem::recover( Future Subsystem::prepare( const ContainerID& containerId, -const string& cgroup) +const string& cgroup, +const mesos::slave::ContainerConfig& containerConfig) { return process::dispatch( process.get(), &SubsystemProcess::prepare, containerId, - cgroup); + cgroup, + containerConfig); } @@ -221,7 +223,8 @@ Future SubsystemProcess::recover( Future SubsystemProcess::prepare( const ContainerID& containerId, -const string& cgroup) +const string& cgroup, +const mesos::slave::ContainerConfig& containerConfig) { return Nothing(); } diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystem.hpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystem.hpp index 088d417..7d33901 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/subsystem.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystem.hpp @@ -90,11 +90,13 @@ public: * * @param containerId The target containerId. * @param cgroup The target cgroup. + * @param containerConfig The container configuration. * @return Nothing or an error if `prepare` fails. */ process::Future prepare( const ContainerID& containerId, - const std::string& cgroup); + const std::string& cgroup, + const mesos::slave::ContainerConfig& containerConfig); /** * Isolate the associated container to cgroups subsystem. @@ -198,7 +200,8 @@ public: virtual process::Future prepare( const ContainerID& containerId, - const std::string& cgroup); + const std::string& cgroup, + const mesos::slave::ContainerConfig& containerConfig); virtual process::Future isolate( const ContainerID& containerId, diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/devices.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/devices.cpp index a
[mesos] 01/21: Added patch for RapidJSON.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 0b47b43d290494fc1c6a6f6241ddfbceeb686997 Author: Qian Zhang AuthorDate: Sun Feb 23 09:53:32 2020 +0800 Added patch for RapidJSON. This commit updates the writer of RapidJSON to write infinite floating point numbers as "Infinity" and "-Infinity" (i.e., with double quotes) rather than Infinity and -Infinity. This is to ensure the strings converted from JSON objects conform to the rule defined by Protobuf: https://developers.google.com/protocol-buffers/docs/proto3#json Review: https://reviews.apache.org/r/72161 --- 3rdparty/CMakeLists.txt| 3 +++ 3rdparty/Makefile.am | 8 3rdparty/rapidjson-1.1.0.patch | 45 ++ 3 files changed, 56 insertions(+) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index c45d742..119813e 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -479,9 +479,12 @@ target_include_directories( rapidjson INTERFACE ${RAPIDJSON_ROOT}/include) +PATCH_CMD(RAPIDJSON_PATCH_CMD rapidjson-${RAPIDJSON_VERSION}.patch) + ExternalProject_Add( ${RAPIDJSON_TARGET} PREFIX${RAPIDJSON_CMAKE_ROOT} + PATCH_COMMAND ${RAPIDJSON_PATCH_CMD} CONFIGURE_COMMAND ${CMAKE_NOOP} BUILD_COMMAND ${CMAKE_NOOP} INSTALL_COMMAND ${CMAKE_NOOP} diff --git a/3rdparty/Makefile.am b/3rdparty/Makefile.am index 243a619..c277627 100644 --- a/3rdparty/Makefile.am +++ b/3rdparty/Makefile.am @@ -127,6 +127,14 @@ EXTRA_DIST += \ EXTRA_DIST += \ $(ZOOKEEPER).patch +# We need to patch RapidJSON to make it write infinite floating point numbers +# as "Infinity" and "-Infinity" (i.e., with double quotes) rather than Infinity +# and -Infinity. This is to ensure the strings converted from JSON objects +# conform to the rule defined by Protobuf: +# https://developers.google.com/protocol-buffers/docs/proto3#json +EXTRA_DIST += \ + $(RAPIDJSON).patch + # We need the following patches for CMake and/or Windows builds. EXTRA_DIST += \ $(BOOST).patch \ diff --git a/3rdparty/rapidjson-1.1.0.patch b/3rdparty/rapidjson-1.1.0.patch new file mode 100644 index 000..8e655d9 --- /dev/null +++ b/3rdparty/rapidjson-1.1.0.patch @@ -0,0 +1,45 @@ +diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h +index 94f22dd5..ac522f6b 100644 +--- a/include/rapidjson/writer.h b/include/rapidjson/writer.h +@@ -324,13 +324,17 @@ protected: + return true; + } + if (internal::Double(d).Sign()) { +-PutReserve(*os_, 9); ++PutReserve(*os_, 11); ++PutUnsafe(*os_, '\"'); + PutUnsafe(*os_, '-'); + } +-else +-PutReserve(*os_, 8); ++else { ++PutReserve(*os_, 10); ++PutUnsafe(*os_, '\"'); ++} + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); ++PutUnsafe(*os_, '\"'); + return true; + } + +@@ -518,13 +522,17 @@ inline bool Writer::WriteDouble(double d) { + return true; + } + if (internal::Double(d).Sign()) { +-PutReserve(*os_, 9); ++PutReserve(*os_, 11); ++PutUnsafe(*os_, '\"'); + PutUnsafe(*os_, '-'); + } +-else +-PutReserve(*os_, 8); ++else { ++PutReserve(*os_, 10); ++PutUnsafe(*os_, '\"'); ++} + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); ++PutUnsafe(*os_, '\"'); + return true; + }
[mesos] 17/21: Set resource limits and OOM score adjustment in Docker executor.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 9ba0866b6c8033f627a4a15a6727934bb8178de4 Author: Qian Zhang AuthorDate: Fri Jan 17 15:20:19 2020 +0800 Set resource limits and OOM score adjustment in Docker executor. Review: https://reviews.apache.org/r/72022 --- src/docker/docker.cpp | 103 ++-- src/docker/docker.hpp | 13 +- src/docker/executor.cpp | 4 +- 3 files changed, 96 insertions(+), 24 deletions(-) diff --git a/src/docker/docker.cpp b/src/docker/docker.cpp index 04fb8d0..a5f15dd 100644 --- a/src/docker/docker.cpp +++ b/src/docker/docker.cpp @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -54,6 +55,8 @@ #include "linux/cgroups.hpp" #endif // __linux__ +#include "slave/containerizer/mesos/utils.hpp" + #include "slave/containerizer/mesos/isolators/cgroups/constants.hpp" #include "slave/constants.hpp" @@ -625,11 +628,12 @@ Try Docker::RunOptions::create( const string& name, const string& sandboxDirectory, const string& mappedDirectory, -const Option& resources, +const Option& resourceRequests, bool enableCfsQuota, const Option>& env, const Option>& devices, -const Option& defaultContainerDNS) +const Option& defaultContainerDNS, +const Option>& resourceLimits) { if (!containerInfo.has_docker()) { return Error("No docker info found in container info"); @@ -640,26 +644,75 @@ Try Docker::RunOptions::create( RunOptions options; options.privileged = dockerInfo.privileged(); - if (resources.isSome()) { -// TODO(yifan): Support other resources (e.g. disk). -Option cpus = resources->cpus(); -if (cpus.isSome()) { - options.cpuShares = std::max( - static_cast(CPU_SHARES_PER_CPU * cpus.get()), - MIN_CPU_SHARES); + Option cpuRequest, cpuLimit, memLimit; + Option memRequest; - if (enableCfsQuota) { -const Duration quota = - std::max(CPU_CFS_PERIOD * cpus.get(), MIN_CPU_CFS_QUOTA); + if (resourceRequests.isSome()) { +// TODO(yifan): Support other resources (e.g. disk). +cpuRequest = resourceRequests->cpus(); +memRequest = resourceRequests->mem(); + } -options.cpuQuota = static_cast(quota.us()); + if (resourceLimits.isSome()) { +foreach (auto&& limit, resourceLimits.get()) { + if (limit.first == "cpus") { +cpuLimit = limit.second.value(); + } else if (limit.first == "mem") { +memLimit = limit.second.value(); } } + } + + if (cpuRequest.isSome()) { +options.cpuShares = std::max( +static_cast(CPU_SHARES_PER_CPU * cpuRequest.get()), +MIN_CPU_SHARES); + } + + // Set the `--cpu-quota` option to CPU limit (if it is not an infinite + // value) or to CPU request if the flag `--cgroups_enable_cfs` is true. + // If CPU limit is infinite, `--cpu-quota` will not be set at all which + // means the Docker container will run with infinite CPU quota. + if (cpuLimit.isSome()) { +if (!std::isinf(cpuLimit.get())) { + const Duration quota = +std::max(CPU_CFS_PERIOD * cpuLimit.get(), MIN_CPU_CFS_QUOTA); + + options.cpuQuota = static_cast(quota.us()); +} + } else if (enableCfsQuota && cpuRequest.isSome()) { +const Duration quota = + std::max(CPU_CFS_PERIOD * cpuRequest.get(), MIN_CPU_CFS_QUOTA); + +options.cpuQuota = static_cast(quota.us()); + } + + // Set the `--memory` option to memory limit (if it is not an infinite + // value) or to memory request. If memory limits is infinite, `--memory` + // will not be set at all which means the Docker container will run with + // infinite memory limit. + if (memLimit.isSome()) { +if (!std::isinf(memLimit.get())) { + options.memory = +std::max(Megabytes(static_cast(memLimit.get())), MIN_MEMORY); +} + +if (memRequest.isSome()) { + options.memoryReservation = std::max(memRequest.get(), MIN_MEMORY); + + if (memRequest.get() < Megabytes(static_cast(memLimit.get( { +Try oomScoreAdj = calculateOOMScoreAdj(memRequest.get()); +if (oomScoreAdj.isError()) { + return Error( + "Failed to calculate OOM score adjustment: " + + oomScoreAdj.error()); +} -Option mem = resources->mem(); -if (mem.isSome()) { - options.memory = std::max(mem.get(), MIN_MEMORY); +options.oomScoreAdj = oomScoreAdj.get(); + } } + } else if (memRequest.isSome()) { +options.memory = std::max(memRequest.get(), MIN_MEMORY); } if (env.isSome()) {
[mesos] 03/21: Set resource limits when launching executor container.
This is an automated email from the ASF dual-hosted git repository. qianzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git commit 4688371cafed058890cc11c8aa8514db6b08bb2b Author: Qian Zhang AuthorDate: Tue Dec 3 21:37:43 2019 +0800 Set resource limits when launching executor container. Review: https://reviews.apache.org/r/71858 --- src/slave/slave.cpp | 126 src/slave/slave.hpp | 5 +++ 2 files changed, 131 insertions(+) diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index a914de4..f214560 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -188,6 +188,11 @@ static CommandInfo defaultExecutorCommandInfo( const Option& user); +// Sets the executor resource limit (the `limit` parameter) based on the resource +// passed in (the `value` parameter). +static void setLimit(Option& limit, const Value::Scalar& value); + + Slave::Slave(const string& id, const slave::Flags& _flags, MasterDetector* _detector, @@ -3255,6 +3260,7 @@ void Slave::__run( lambda::_1, frameworkId, executorInfo_, + computeExecutorLimits(executorInfo.resources(), tasks), taskGroup.isNone() ? task.get() : Option::none())); } @@ -3635,6 +3641,7 @@ void Slave::launchExecutor( const Future>& authenticationToken, const FrameworkID& frameworkId, const ExecutorInfo& executorInfo, +const google::protobuf::Map& executorLimits, const Option& taskInfo) { Framework* framework = getFramework(frameworkId); @@ -3716,6 +3723,10 @@ void Slave::launchExecutor( *containerConfig.mutable_resources() = executorInfo.resources(); containerConfig.set_directory(executor->directory); + if (!executorLimits.empty()) { +*containerConfig.mutable_limits() = executorLimits; + } + if (executor->user.isSome()) { containerConfig.set_user(executor->user.get()); } @@ -9967,6 +9978,100 @@ void Slave::initializeResourceProviderManager( } +google::protobuf::Map Slave::computeExecutorLimits( +const Resources& executorResources, +const vector& tasks) const +{ + Option executorCpuLimit, executorMemLimit; + Value::Scalar cpuRequest, memRequest; + foreach (const TaskInfo& task, tasks) { +// Count the task's CPU limit into the executor's CPU limit. +if (task.limits().count("cpus")) { + setLimit(executorCpuLimit, task.limits().at("cpus")); +} else { + Option taskCpus = +Resources(task.resources()).get("cpus"); + + if (taskCpus.isSome()) { +cpuRequest += taskCpus.get(); + } +} + +// Count the task's memory limit into the executor's memory limit. +if (task.limits().count("mem")) { + setLimit(executorMemLimit, task.limits().at("mem")); +} else { + Option taskMem = +Resources(task.resources()).get("mem"); + + if (taskMem.isSome()) { +memRequest += taskMem.get(); + } +} + } + + if (executorCpuLimit.isSome()) { +// Count the executor's CPU request into its CPU limit as well, this is to +// ensure the executor's CPU limit is always greater than its CPU request. +Option executorCpus = + executorResources.get("cpus"); + +if (executorCpus.isSome()) { + setLimit(executorCpuLimit, executorCpus.get()); +} + +// For the tasks which do not have CPU limit, count their CPU requests +// into the executor's CPU limit as well, this is also to ensure the +// executor's CPU limit is always greater than its CPU request. Please +// note that if the flag `cgroups_enable_cfs` is not enabled, we should +// not set the executor's CPU limit, otherwise the tasks which do not +// have CPU limit will be throttled implicitly by the executor's CPU limit. +if (cpuRequest.value() > 0) { +#ifdef __linux__ + if (flags.cgroups_enable_cfs) { +setLimit(executorCpuLimit, cpuRequest); + } else { +executorCpuLimit = None(); + } +#else + setLimit(executorCpuLimit, cpuRequest); +#endif // __linux__ +} + } + + if (executorMemLimit.isSome()) { +// Count the executor's memory request into its memory limit as well, +// this is to ensure the executor's memory limit is always greater +// than its memory request. +Option executorMem = + executorResources.get("mem"); + +if (executorMem.isSome()) { + setLimit(executorMemLimit, executorMem.get()); +} + +// For the tasks which do not have memory limit, count their memory +// requests into the executor's memory limit as well, this is also +// to ensure the executor's memory limit is always greater than its +// memor