This is an automated email from the ASF dual-hosted git repository. josephwu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit a2d6a81f5dc47967726a6205799825b345231e80 Author: Joseph Wu <[email protected]> AuthorDate: Mon Oct 29 12:07:20 2018 -0700 Fixed LongLivedDefaultExecutorRestart GC test. This test was incorrectly restarting the agent actor inside the test. In the flaky test, the agent actor would be started with an auto- generated PID (i.e. `slave(1)`, `slave(2)`, etc). Because of how this generation works, each PID will be unique. The executor in the test would be launched under `slave(1)` but the restarted agent would have a PID of `slave(2)`. This meant the executor's reregistration would fail with '404 Not Found' and the executor would be cleaned up. The executor cleanup would potentially trigger a TASK_LOST status update; and if that update is sent prior to ending the test, this will break some mock expectations and cause the test to fail. This changes the test to always use the same PID for the agent actor. Review: https://reviews.apache.org/r/69203 --- src/tests/gc_tests.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/tests/gc_tests.cpp b/src/tests/gc_tests.cpp index 4d94430..a583f1d 100644 --- a/src/tests/gc_tests.cpp +++ b/src/tests/gc_tests.cpp @@ -903,8 +903,7 @@ TEST_F(GarbageCollectorIntegrationTest, LongLivedDefaultExecutor) // when a task finishes, but the executor is still running. This version of // the test restarts the agent to ensure recovered tasks are also scheduled // for GC. -TEST_F( - GarbageCollectorIntegrationTest, DISABLED_LongLivedDefaultExecutorRestart) +TEST_F(GarbageCollectorIntegrationTest, LongLivedDefaultExecutorRestart) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); @@ -915,8 +914,14 @@ TEST_F( // Turn on GC of nested container sandboxes by default. flags.gc_non_executor_container_sandboxes = true; + // Start the slave with a static process ID. This allows the executor to + // reconnect with the slave upon a process restart. + const string id(process::ID::generate("agent")); + Owned<MasterDetector> detector = master.get()->createDetector(); - Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); + Try<Owned<cluster::Slave>> slave = + StartSlave(detector.get(), id, flags, false); + ASSERT_SOME(slave); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); @@ -1094,7 +1099,7 @@ TEST_F( // The agent should reregister once recovery is complete, which also means // that any finished tasks metadata/sandboxes should be rescheduled for GC. - slave = StartSlave(detector.get(), flags); + slave = StartSlave(detector.get(), id, flags, false); ASSERT_SOME(slave); AWAIT_READY(slaveReregisteredMessage);
