Enabled pause/resume for general checks. Review: https://reviews.apache.org/r/57912/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3a689ab5 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3a689ab5 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3a689ab5 Branch: refs/heads/master Commit: 3a689ab552a9ff23dd912e0178d3c5af393f7e84 Parents: 5c9ce37 Author: Gastón Kleiman <[email protected]> Authored: Thu Mar 30 13:41:14 2017 +0200 Committer: Alexander Rukletsov <[email protected]> Committed: Thu Mar 30 19:34:24 2017 +0200 ---------------------------------------------------------------------- src/checks/checker.cpp | 52 ++++++++++++++++++++++++++++++++-- src/checks/checker.hpp | 7 ++--- src/launcher/default_executor.cpp | 22 +++++++++----- src/launcher/executor.cpp | 4 +-- 4 files changed, 69 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/checks/checker.cpp ---------------------------------------------------------------------- diff --git a/src/checks/checker.cpp b/src/checks/checker.cpp index 1664acd..d1e9083 100644 --- a/src/checks/checker.cpp +++ b/src/checks/checker.cpp @@ -127,6 +127,9 @@ public: const Option<pid_t>& _taskPid, const std::vector<std::string>& _namespaces); + void pause(); + void resume(); + virtual ~CheckerProcess() {} protected: @@ -167,6 +170,7 @@ private: Option<lambda::function<pid_t(const lambda::function<int()>&)>> clone; CheckStatusInfo previousCheckStatus; + bool paused; }; @@ -208,9 +212,15 @@ Checker::~Checker() } -void Checker::stop() +void Checker::pause() +{ + dispatch(process.get(), &CheckerProcess::pause); +} + + +void Checker::resume() { - terminate(process.get(), true); + dispatch(process.get(), &CheckerProcess::resume); } @@ -225,7 +235,8 @@ CheckerProcess::CheckerProcess( updateCallback(_callback), taskId(_taskId), taskPid(_taskPid), - namespaces(_namespaces) + namespaces(_namespaces), + paused(false) { Try<Duration> create = Duration::create(check.delay_seconds()); CHECK_SOME(create); @@ -286,6 +297,10 @@ void CheckerProcess::finalize() void CheckerProcess::performCheck() { + if (paused) { + return; + } + Stopwatch stopwatch; stopwatch.start(); @@ -314,16 +329,47 @@ void CheckerProcess::performCheck() void CheckerProcess::scheduleNext(const Duration& duration) { + CHECK(!paused); + VLOG(1) << "Scheduling check for task '" << taskId << "' in " << duration; delay(duration, self(), &Self::performCheck); } +void CheckerProcess::pause() +{ + if (!paused) { + VLOG(1) << "Checking for task '" << taskId << "' paused"; + + paused = true; + } +} + + +void CheckerProcess::resume() +{ + if (paused) { + VLOG(1) << "Checking for task '" << taskId << "' resumed"; + + paused = false; + + // Schedule a check immediately. + scheduleNext(Duration::zero()); + } +} + void CheckerProcess::processCheckResult( const Stopwatch& stopwatch, const CheckStatusInfo& result) { + // `Checker` might have been paused while performing the check. + if (paused) { + LOG(INFO) << "Ignoring " << check.type() << " check result for" + << " task '" << taskId << "': checking is paused"; + return; + } + VLOG(1) << "Performed " << check.type() << " check" << " for task '" << taskId << "' in " << stopwatch.elapsed(); http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/checks/checker.hpp ---------------------------------------------------------------------- diff --git a/src/checks/checker.hpp b/src/checks/checker.hpp index e8af316..1521b9c 100644 --- a/src/checks/checker.hpp +++ b/src/checks/checker.hpp @@ -68,10 +68,9 @@ public: Checker(const Checker&) = delete; Checker& operator=(const Checker&) = delete; - /** - * Immediately stops checking. Any in-flight checks are dropped. - */ - void stop(); + // Idempotent helpers for pausing and resuming checking. + void pause(); + void resume(); private: explicit Checker(process::Owned<CheckerProcess> process); http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/launcher/default_executor.cpp ---------------------------------------------------------------------- diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp index 606fd9c..79785fc 100644 --- a/src/launcher/default_executor.cpp +++ b/src/launcher/default_executor.cpp @@ -163,8 +163,12 @@ public: } } - // Pause all health checks. + // Pause all checks and health checks. foreachvalue (Owned<Container> container, containers) { + if (container->checker.isSome()) { + container->checker->get()->pause(); + } + if (container->healthChecker.isSome()) { container->healthChecker->get()->pause(); } @@ -193,8 +197,12 @@ public: wait(containers.keys()); } - // Resume all health checks. + // Resume all checks and health checks. foreachvalue (Owned<Container> container, containers) { + if (container->checker.isSome()) { + container->checker->get()->resume(); + } + if (container->healthChecker.isSome()) { container->healthChecker->get()->resume(); } @@ -738,11 +746,11 @@ protected: deserialize<agent::Response>(contentType, response->body); CHECK_SOME(waitResponse); - // If there is an associated checker with the task, stop it to - // avoid sending check updates after a terminal status update. + // If the task is checked, pause the associated checker to avoid + // sending check updates after a terminal status update. if (container->checker.isSome()) { CHECK_NOTNULL(container->checker->get()); - container->checker->get()->stop(); + container->checker->get()->pause(); container->checker = None(); } @@ -931,13 +939,13 @@ protected: CHECK(!container->killing); container->killing = true; - // If the task is checked, stop the associated checker. + // If the task is checked, pause the associated checker. // // TODO(alexr): Once we support `TASK_KILLING` in this executor, // consider continuing checking the task after sending `TASK_KILLING`. if (container->checker.isSome()) { CHECK_NOTNULL(container->checker->get()); - container->checker->get()->stop(); + container->checker->get()->pause(); container->checker = None(); } http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/launcher/executor.cpp ---------------------------------------------------------------------- diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp index 8bd266e..bc69beb 100644 --- a/src/launcher/executor.cpp +++ b/src/launcher/executor.cpp @@ -731,7 +731,7 @@ private: // Stop checking the task. if (checker.get() != nullptr) { - checker->stop(); + checker->pause(); } // Stop health checking the task. @@ -776,7 +776,7 @@ private: // Stop checking the task. if (checker.get() != nullptr) { - checker->stop(); + checker->pause(); } // Stop health checking the task.
