Repository: aurora Updated Branches: refs/heads/master dfd06771a -> 7a803730c
Added additional stop() to prevent errors in run() to stop shutdown in SchedulerMain Ensure that `SchedulerMain.run()` calls stop in the case of exceptions. This prevents the Scheduler from being transitioned to DEAD state, but not actually stopping it's services. See the attached ticket for an example of issue happening. Testing Done: Added an additional unit test for prepare() failing in `SchedulerLifecycle.java`. ./gradlew test ./build-support/jenkin/build.sh Bugs closed: AURORA-1950 Reviewed at https://reviews.apache.org/r/62626/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/7a803730 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/7a803730 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/7a803730 Branch: refs/heads/master Commit: 7a803730c95fc7d1f788292d83c3d2eeb81a936d Parents: dfd0677 Author: Jordan Ly <[email protected]> Authored: Thu Sep 28 17:23:17 2017 -0700 Committer: Santhosh Kumar <[email protected]> Committed: Thu Sep 28 17:23:17 2017 -0700 ---------------------------------------------------------------------- .../aurora/scheduler/app/SchedulerMain.java | 23 ++++++------- .../scheduler/SchedulerLifecycleTest.java | 34 ++++++++++++++++---- 2 files changed, 40 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/7a803730/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java b/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java index 4b3ba5e..bb7055e 100644 --- a/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java +++ b/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java @@ -143,16 +143,17 @@ public class SchedulerMain { } void run() { - startupServices.startAsync(); - Runtime.getRuntime().addShutdownHook(new Thread(SchedulerMain.this::stop, "ShutdownHook")); - startupServices.awaitHealthy(); + try { + startupServices.startAsync(); + Runtime.getRuntime().addShutdownHook(new Thread(SchedulerMain.this::stop, "ShutdownHook")); + startupServices.awaitHealthy(); - LeadershipListener leaderListener = schedulerLifecycle.prepare(); + LeadershipListener leaderListener = schedulerLifecycle.prepare(); + + HostAndPort httpAddress = httpService.getAddress(); + InetSocketAddress httpSocketAddress = + InetSocketAddress.createUnresolved(httpAddress.getHost(), httpAddress.getPort()); - HostAndPort httpAddress = httpService.getAddress(); - InetSocketAddress httpSocketAddress = - InetSocketAddress.createUnresolved(httpAddress.getHost(), httpAddress.getPort()); - try { schedulerService.lead( httpSocketAddress, ImmutableMap.of(SERVERSET_ENDPOINT_NAME.get(), httpSocketAddress), @@ -161,10 +162,10 @@ public class SchedulerMain { throw new IllegalStateException("Failed to lead service.", e); } catch (InterruptedException e) { throw new IllegalStateException("Interrupted while joining scheduler service group.", e); + } finally { + appLifecycle.awaitShutdown(); + stop(); } - - appLifecycle.awaitShutdown(); - stop(); } @VisibleForTesting http://git-wip-us.apache.org/repos/asf/aurora/blob/7a803730/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java b/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java index 70479ef..edd7380 100644 --- a/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java +++ b/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java @@ -106,8 +106,14 @@ public class SchedulerLifecycleTest extends EasyMockTest { serviceManager.awaitHealthy(); } - private void expectShutdown() throws Exception { + private void expectLeaderShutdown() throws Exception { leaderControl.leave(); + expectShutdown(); + } + + private void expectShutdown() throws Exception { + // Shutdown before leadership is taken (ex. in prepare). + expect(driver.stopAsync()).andReturn(driver); driver.awaitTerminated(); storageUtil.storage.stop(); @@ -128,7 +134,7 @@ public class SchedulerLifecycleTest extends EasyMockTest { expectInitializeDriver(); expectFullStartup(); - expectShutdown(); + expectLeaderShutdown(); replayAndCreateLifecycle(); @@ -151,7 +157,7 @@ public class SchedulerLifecycleTest extends EasyMockTest { expect(driver.startAsync()).andReturn(driver); driver.awaitRunning(); - expectShutdown(); + expectLeaderShutdown(); replayAndCreateLifecycle(); @@ -170,7 +176,7 @@ public class SchedulerLifecycleTest extends EasyMockTest { driver.awaitRunning(); // Important piece here is what's absent - leader presence is not advertised. - expectShutdown(); + expectLeaderShutdown(); replayAndCreateLifecycle(); @@ -180,12 +186,28 @@ public class SchedulerLifecycleTest extends EasyMockTest { } @Test + public void testStoragePrepareFails() throws Exception { + storageUtil.storage.prepare(); + expectLastCall().andThrow(new StorageException("Prepare failed.")); + expectShutdown(); + + replayAndCreateLifecycle(); + + try { + schedulerLifecycle.prepare(); + fail(); + } catch (StorageException e) { + // Expected. + } + } + + @Test public void testStorageStartFails() throws Exception { storageUtil.storage.prepare(); storageUtil.expectOperations(); storageUtil.storage.start(EasyMock.anyObject()); expectLastCall().andThrow(new StorageException("Recovery failed.")); - expectShutdown(); + expectLeaderShutdown(); replayAndCreateLifecycle(); @@ -209,7 +231,7 @@ public class SchedulerLifecycleTest extends EasyMockTest { expectInitializeDriver(); expectFullStartup(); - expectShutdown(); + expectLeaderShutdown(); expect(serviceManager.stopAsync()).andReturn(serviceManager); serviceManager.awaitStopped(5, TimeUnit.SECONDS);
