Fixed the agent recovery crash if metadata is missing. This is the case that is missed when handling orphan containers cleanup. When the agent metadata does not exist but the container pid is chechpointed under the container runtime dir, then the container should be regarded as orphan and should be cleaned up.
Review: https://reviews.apache.org/r/66539 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f80b0d0b Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f80b0d0b Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f80b0d0b Branch: refs/heads/master Commit: f80b0d0b863acbb0681e2f8fc063c226686b45a0 Parents: 2bfcbcc Author: Gilbert Song <[email protected]> Authored: Tue Apr 10 11:35:03 2018 -0700 Committer: Gilbert Song <[email protected]> Committed: Tue Apr 17 10:49:30 2018 -0700 ---------------------------------------------------------------------- src/slave/containerizer/mesos/containerizer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/f80b0d0b/src/slave/containerizer/mesos/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index d1d4c2a..6568126 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -856,10 +856,11 @@ Future<Nothing> MesosContainerizerProcess::recover( containerizer::paths::isStandaloneContainer( flags.runtime_dir, containerId); + const ContainerID& rootContainerId = + protobuf::getRootContainerId(containerId); + Option<string> directory; if (containerId.has_parent()) { - const ContainerID& rootContainerId = - protobuf::getRootContainerId(containerId); CHECK(containers_.contains(rootContainerId)); if (containers_[rootContainerId]->directory.isSome()) { @@ -906,7 +907,8 @@ Future<Nothing> MesosContainerizerProcess::recover( // elsewhere. const bool isRecoverableNestedContainer = containerId.has_parent() && - (containers_.contains(protobuf::getRootContainerId(containerId))) && + containers_.contains(rootContainerId) && + !orphans.contains(rootContainerId) && pid.isSome() && !containerizer::paths::getContainerForceDestroyOnRecovery( flags.runtime_dir, containerId);
