Fix docker recovery when launched in container and add more logging. Review: https://reviews.apache.org/r/29341
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a76bed6e Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a76bed6e Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a76bed6e Branch: refs/heads/master Commit: a76bed6ecd6eb01755cc5ef5a5a693b61ca2ef86 Parents: 499035a Author: Timothy Chen <[email protected]> Authored: Fri Apr 10 16:02:40 2015 -0700 Committer: Timothy Chen <[email protected]> Committed: Sun May 24 22:27:40 2015 -0700 ---------------------------------------------------------------------- src/slave/containerizer/docker.cpp | 56 ++++++++++++++++++++++++--------- src/slave/containerizer/docker.hpp | 2 +- 2 files changed, 43 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/a76bed6e/src/slave/containerizer/docker.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 66e3746..5bce2ce 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -109,15 +109,10 @@ Option<ContainerID> parse(const Docker::Container& container) } vector<string> parts = strings::split(name.get(), DOCKER_NAME_SEPERATOR); - if (parts.size() == 2) { + if (parts.size() == 2 || parts.size() == 3) { ContainerID id; id.set_value(parts[1]); return id; - } else if (parts.size() == 3) { - // We found a executor or log container. - ContainerID id; - id.set_value(parts[2]); - return id; } } @@ -131,6 +126,8 @@ Try<pid_t> launchWaitProcess(const string& docker, const string& name) { string command = "exit `" + docker + " wait " + name + "`"; + VLOG(1) << "Launching wait process: " << command; + Try<Subprocess> wait = subprocess( command, Subprocess::PATH("/dev/null"), @@ -466,6 +463,7 @@ Future<Nothing> DockerContainerizerProcess::recover( const Option<SlaveState>& state) { LOG(INFO) << "Recovering Docker containers"; + if (state.isSome()) { // Get the list of all Docker containers (running and exited) in // order to remove any orphans and reconcile checkpointed executors. @@ -774,6 +772,11 @@ Future<pid_t> DockerContainerizerProcess::___launch( environment[variable.name()] = variable.value(); } + // Pass GLOG flag to the executor. + if (os::hasenv("GLOG_v")) { + environment["GLOG_v"] = os::getenv("GLOG_v"); + } + string command = "mesos-docker-executor --docker=" + flags.docker + " --container=" + container->name(); @@ -854,6 +857,11 @@ Future<Nothing> DockerContainerizerProcess::___launchInContainer( environment[variable.name()] = variable.value(); } + // Pass GLOG flag to the executor. + if (os::hasenv("GLOG_v")) { + environment["GLOG_v"] = os::getenv("GLOG_v"); + } + // We are launching a mesos-docker-executor in a docker container so // that the containerizer can recover the executor container, as we // are assuming this instance is launched in a docker container and @@ -863,10 +871,17 @@ Future<Nothing> DockerContainerizerProcess::___launchInContainer( // Mounting in the docker socket so the executor can communicate to // the host docker daemon. We are assuming the current instance is // launching docker containers to the host daemon as well. - Volume* volume = containerInfo.add_volumes(); - volume->set_host_path(flags.docker_socket); - volume->set_container_path(flags.docker_socket); - volume->set_mode(Volume::RO); + Volume* dockerSockVolume = containerInfo.add_volumes(); + dockerSockVolume->set_host_path(flags.docker_socket); + dockerSockVolume->set_container_path(flags.docker_socket); + dockerSockVolume->set_mode(Volume::RO); + + // Mounting in sandbox so the logs from the executor can be + // persisted over container failures. + Volume* sandboxVolume = containerInfo.add_volumes(); + sandboxVolume->set_host_path(container->directory); + sandboxVolume->set_container_path(container->directory); + sandboxVolume->set_mode(Volume::RW); ContainerInfo::DockerInfo dockerInfo; @@ -1461,7 +1476,7 @@ void DockerContainerizerProcess::destroy( // been configured to launch but we might have recovered containers // on previous slave run that has configured to launch executor in // docker. - docker->stop(container->executorName(), Seconds(0), true); + docker->stop(container->executorName(), Seconds(0)); container->state = Container::DESTROYING; @@ -1538,7 +1553,12 @@ void DockerContainerizerProcess::__destroy( containers_.erase(containerId); - delay(flags.docker_remove_delay, self(), &Self::remove, container->name()); + delay( + flags.docker_remove_delay, + self(), + &Self::remove, + container->name(), + container->executorName()); delete container; @@ -1576,7 +1596,12 @@ void DockerContainerizerProcess::___destroy( containers_.erase(containerId); - delay(flags.docker_remove_delay, self(), &Self::remove, container->name()); + delay( + flags.docker_remove_delay, + self(), + &Self::remove, + container->name(), + container->executorName()); delete container; } @@ -1601,9 +1626,12 @@ void DockerContainerizerProcess::reaped(const ContainerID& containerId) } -void DockerContainerizerProcess::remove(const string& container) +void DockerContainerizerProcess::remove( + const string& container, + const string& executor) { docker->rm(container, true); + docker->rm(executor, true); } http://git-wip-us.apache.org/repos/asf/mesos/blob/a76bed6e/src/slave/containerizer/docker.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp index e683ea4..0bfb0e9 100644 --- a/src/slave/containerizer/docker.hpp +++ b/src/slave/containerizer/docker.hpp @@ -271,7 +271,7 @@ private: void reaped(const ContainerID& containerId); // Removes the docker container. - void remove(const std::string& container); + void remove(const std::string& container, const std::string& executor); const Flags flags;
