Fixed a race in Cluster shutdown when destroying containers. Review: https://reviews.apache.org/r/30353/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/977b80ae Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/977b80ae Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/977b80ae Branch: refs/heads/master Commit: 977b80ae5e9f818648adc7b9f6d959264a84a9fe Parents: aa9da62 Author: Benjamin Mahler <[email protected]> Authored: Tue Jan 27 18:49:38 2015 -0800 Committer: Benjamin Mahler <[email protected]> Committed: Wed Jan 28 13:50:08 2015 -0800 ---------------------------------------------------------------------- src/tests/cluster.hpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/977b80ae/src/tests/cluster.hpp ---------------------------------------------------------------------- diff --git a/src/tests/cluster.hpp b/src/tests/cluster.hpp index 74cedb3..90fda52 100644 --- a/src/tests/cluster.hpp +++ b/src/tests/cluster.hpp @@ -435,22 +435,29 @@ inline void Cluster::Slaves::shutdown() foreachpair (const process::PID<slave::Slave>& pid, const Slave& slave, copy) { - process::Future<hashset<ContainerID> > containers = + // Destroy the existing containers on the slave. Note that some + // containers may terminate while we are doing this, so we ignore + // any 'wait' failures and ensure that there are no containers + // when we're done destroying. + process::Future<hashset<ContainerID>> containers = slave.containerizer->containers(); AWAIT_READY(containers); foreach (const ContainerID& containerId, containers.get()) { - // We need to wait on the container before destroying it in case someone - // else has already waited on it (and therefore would be immediately - // 'reaped' before we could wait on it). process::Future<containerizer::Termination> wait = slave.containerizer->wait(containerId); slave.containerizer->destroy(containerId); - AWAIT_READY(wait); + AWAIT(wait); } + containers = slave.containerizer->containers(); + AWAIT_READY(containers); + + ASSERT_TRUE(containers.get().empty()) + << "Failed to destroy containers: " << stringify(containers.get()); + stop(pid); } slaves.clear();
