Fixed the agent recovery crash if metadata is missing. This is the case that is missed when handling orphan containers cleanup. When the agent metadata does not exist but the container pid is chechpointed under the container runtime dir, then the container should be regarded as orphan and should be cleaned up.
Review: https://reviews.apache.org/r/66539 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/08040790 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/08040790 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/08040790 Branch: refs/heads/1.5.x Commit: 08040790ea2fbf1836db57292f96cb4524faabee Parents: 7735028 Author: Gilbert Song <[email protected]> Authored: Tue Apr 10 11:35:03 2018 -0700 Committer: Gilbert Song <[email protected]> Committed: Tue Apr 17 10:56:16 2018 -0700 ---------------------------------------------------------------------- src/slave/containerizer/mesos/containerizer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/08040790/src/slave/containerizer/mesos/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index cddc617..c0fe435 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -855,10 +855,11 @@ Future<Nothing> MesosContainerizerProcess::recover( containerizer::paths::isStandaloneContainer( flags.runtime_dir, containerId); + const ContainerID& rootContainerId = + protobuf::getRootContainerId(containerId); + Option<string> directory; if (containerId.has_parent()) { - const ContainerID& rootContainerId = - protobuf::getRootContainerId(containerId); CHECK(containers_.contains(rootContainerId)); if (containers_[rootContainerId]->directory.isSome()) { @@ -905,7 +906,8 @@ Future<Nothing> MesosContainerizerProcess::recover( // elsewhere. const bool isRecoverableNestedContainer = containerId.has_parent() && - (containers_.contains(protobuf::getRootContainerId(containerId))) && + containers_.contains(rootContainerId) && + !orphans.contains(rootContainerId) && pid.isSome() && !containerizer::paths::getContainerForceDestroyOnRecovery( flags.runtime_dir, containerId);
