Updated health check to use the new CommandInfo. Review: https://reviews.apache.org/r/24632
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/afc65bf0 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/afc65bf0 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/afc65bf0 Branch: refs/heads/master Commit: afc65bf004d342e9ef122b614a15ff643b417748 Parents: 2d4eb0e Author: Jie Yu <[email protected]> Authored: Tue Aug 12 19:17:29 2014 -0700 Committer: Jie Yu <[email protected]> Committed: Tue Aug 12 23:28:29 2014 -0700 ---------------------------------------------------------------------- src/health-check/main.cpp | 137 ++++++++++++++++++++++------------ src/tests/health_check_tests.cpp | 103 ++++++++++++++++++++++--- 2 files changed, 182 insertions(+), 58 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/afc65bf0/src/health-check/main.cpp ---------------------------------------------------------------------- diff --git a/src/health-check/main.cpp b/src/health-check/main.cpp index 472bffc..6849947 100644 --- a/src/health-check/main.cpp +++ b/src/health-check/main.cpp @@ -18,28 +18,31 @@ #include <signal.h> #include <stdio.h> -#include <iostream> -#include <string> #include <string.h> #include <unistd.h> +#include <iostream> +#include <string> +#include <vector> + #include <mesos/mesos.hpp> -#include <process/pid.hpp> #include <process/defer.hpp> #include <process/delay.hpp> #include <process/future.hpp> #include <process/io.hpp> +#include <process/pid.hpp> #include <process/process.hpp> #include <process/protobuf.hpp> #include <process/subprocess.hpp> #include <stout/duration.hpp> -#include <stout/os.hpp> -#include <stout/option.hpp> #include <stout/flags.hpp> -#include <stout/protobuf.hpp> #include <stout/json.hpp> +#include <stout/option.hpp> +#include <stout/os.hpp> +#include <stout/protobuf.hpp> +#include <stout/strings.hpp> #include "common/status_utils.hpp" @@ -50,8 +53,9 @@ using namespace mesos; using std::cout; using std::cerr; using std::endl; -using std::string; using std::map; +using std::string; +using std::vector; using process::UPID; @@ -137,61 +141,96 @@ private: { if (check.has_http()) { promise.fail("HTTP health check is not supported"); - } else if (check.has_command()) { - const CommandInfo& command = check.command(); + return; + } - map<string, string> environment; + if (!check.has_command()) { + promise.fail("No check found in health check"); + return; + } + + const CommandInfo& command = check.command(); + + map<string, string> environment; + foreach (const Environment_Variable& variable, + command.environment().variables()) { + environment[variable.name()] = variable.value(); + } - foreach (const Environment_Variable& variable, - command.environment().variables()) { - environment[variable.name()] = variable.value(); + // Launch the subprocess. + Option<Try<Subprocess> > external; + + if (command.shell()) { + // Use the shell variant. + if (!command.has_value()) { + promise.fail("Shell command is not specified"); + return; + } + + VLOG(2) << "Launching health command '" << command.value() << "'"; + + external = process::subprocess( + command.value(), + Subprocess::PATH("/dev/null"), + Subprocess::FD(STDERR_FILENO), + Subprocess::FD(STDERR_FILENO), + environment); + } else { + // Use the execve variant. + if (!command.has_value()) { + promise.fail("Executable path is not specified"); + return; + } + + vector<string> argv; + foreach (const string& arg, command.argv()) { + argv.push_back(arg); } - VLOG(2) << "Launching health command: " << command.value(); + VLOG(2) << "Launching health command [" << command.value() << ", " + << strings::join(", ", argv) << "]"; - Try<Subprocess> external = - process::subprocess( + external = process::subprocess( command.value(), - // Reading from STDIN instead of PIPE because scripts - // seeing an open STDIN pipe might behave differently - // and we do not expect to pass any value from STDIN - // or PIPE. - Subprocess::FD(STDIN_FILENO), + argv, + Subprocess::PATH("/dev/null"), Subprocess::FD(STDERR_FILENO), Subprocess::FD(STDERR_FILENO), + None(), environment); + } + + CHECK_SOME(external); + + if (external.get().isError()) { + promise.fail("Error creating subprocess for healthcheck"); + return; + } + + Future<Option<int> > status = external.get().get().status(); + status.await(Seconds(check.timeout_seconds())); - if (external.isError()) { - promise.fail("Error creating subprocess for healthcheck"); + if (!status.isReady()) { + string msg = "Command check failed with reason: "; + if (status.isFailed()) { + msg += "failed with error: " + status.failure(); + } else if (status.isDiscarded()) { + msg += "status future discarded"; } else { - Future<Option<int> > status = external.get().status(); - status.await(Seconds(check.timeout_seconds())); - - if (!status.isReady()) { - string msg = "Shell command check failed with reason: "; - if (status.isFailed()) { - msg += "failed with error: " + status.failure(); - } else if (status.isDiscarded()) { - msg += "status future discarded"; - } else { - msg += "status still pending after timeout " + - stringify(Seconds(check.timeout_seconds())); - } - - promise.fail(msg); - return; - } - - int statusCode = status.get().get(); - if (statusCode != 0) { - string message = "Health command check " + WSTRINGIFY(statusCode); - failure(message); - } else { - success(); - } + msg += "status still pending after timeout " + + stringify(Seconds(check.timeout_seconds())); } + + promise.fail(msg); + return; + } + + int statusCode = status.get().get(); + if (statusCode != 0) { + string message = "Health command check " + WSTRINGIFY(statusCode); + failure(message); } else { - promise.fail("No check found in health check"); + success(); } } http://git-wip-us.apache.org/repos/asf/mesos/blob/afc65bf0/src/tests/health_check_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/health_check_tests.cpp b/src/tests/health_check_tests.cpp index 731d944..64fbf62 100644 --- a/src/tests/health_check_tests.cpp +++ b/src/tests/health_check_tests.cpp @@ -60,12 +60,32 @@ class HealthCheckTest : public MesosTest { public: vector<TaskInfo> populateTasks( - const string& cmd, - const string& healthCmd, - const Offer& offer, - const int gracePeriodSeconds = 0, - const Option<int>& consecutiveFailures = None(), - const Option<map<string, string> >& env = None()) + const string& cmd, + const string& healthCmd, + const Offer& offer, + int gracePeriodSeconds = 0, + const Option<int>& consecutiveFailures = None(), + const Option<map<string, string> >& env = None()) + { + CommandInfo healthCommand; + healthCommand.set_value(healthCmd); + + return populateTasks( + cmd, + healthCommand, + offer, + gracePeriodSeconds, + consecutiveFailures, + env); + } + + vector<TaskInfo> populateTasks( + const string& cmd, + CommandInfo healthCommand, + const Offer& offer, + int gracePeriodSeconds = 0, + const Option<int>& consecutiveFailures = None(), + const Option<map<string, string> >& env = None()) { TaskInfo task; task.set_name(""); @@ -75,8 +95,10 @@ public: CommandInfo command; command.set_value(cmd); + Environment::Variable* variable = command.mutable_environment()->add_variables(); + // We need to set the correct directory to launch health check process // instead of the default for tests. variable->set_name("MESOS_LAUNCHER_DIR"); @@ -86,9 +108,6 @@ public: HealthCheck healthCheck; - CommandInfo healthCommand; - healthCommand.set_value(healthCmd); - if (env.isSome()) { foreachpair (const string& name, const string value, env.get()) { Environment::Variable* variable = @@ -175,6 +194,71 @@ TEST_F(HealthCheckTest, HealthyTask) Shutdown(); } + +// Same as above, but use the non-shell version of the health command. +TEST_F(HealthCheckTest, HealthyTaskNonShell) +{ + Try<PID<Master> > master = StartMaster(); + ASSERT_SOME(master); + + slave::Flags flags = CreateSlaveFlags(); + flags.isolation = "posix/cpu,posix/mem"; + + Try<MesosContainerizer*> containerizer = + MesosContainerizer::create(flags, false); + CHECK_SOME(containerizer); + + Try<PID<Slave> > slave = StartSlave(containerizer.get()); + ASSERT_SOME(slave); + + MockScheduler sched; + MesosSchedulerDriver driver( + &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); + + EXPECT_CALL(sched, registered(&driver, _, _)) + .Times(1); + + Future<vector<Offer> > offers; + EXPECT_CALL(sched, resourceOffers(&driver, _)) + .WillOnce(FutureArg<1>(&offers)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + driver.start(); + + AWAIT_READY(offers); + EXPECT_NE(0u, offers.get().size()); + + CommandInfo command; + command.set_shell(false); + command.set_value("/bin/true"); + command.add_argv("true"); + + vector<TaskInfo> tasks = + populateTasks("sleep 120", command, offers.get()[0]); + + Future<TaskStatus> statusRunning; + Future<TaskStatus> statusHealth; + + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .WillOnce(FutureArg<1>(&statusRunning)) + .WillOnce(FutureArg<1>(&statusHealth)); + + driver.launchTasks(offers.get()[0].id(), tasks); + + AWAIT_READY(statusRunning); + EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); + + AWAIT_READY(statusHealth); + EXPECT_EQ(TASK_RUNNING, statusHealth.get().state()); + EXPECT_TRUE(statusHealth.get().healthy()); + + driver.stop(); + driver.join(); + + Shutdown(); +} + + // Testing health status change reporting to scheduler. TEST_F(HealthCheckTest, HealthStatusChange) { @@ -263,6 +347,7 @@ TEST_F(HealthCheckTest, HealthStatusChange) Shutdown(); } + // Testing killing task after number of consecutive failures. // Temporarily disabled due to MESOS-1613. TEST_F(HealthCheckTest, DISABLED_ConsecutiveFailures)
