Repository: aurora
Updated Branches:
  refs/heads/master dfd06771a -> 7a803730c


Added additional stop() to prevent errors in run() to stop shutdown in 
SchedulerMain

Ensure that `SchedulerMain.run()` calls stop in the case of exceptions. This 
prevents the Scheduler from being transitioned to DEAD state, but not actually 
stopping it's services.

See the attached ticket for an example of issue happening.

Testing Done:
Added an additional unit test for prepare() failing in 
`SchedulerLifecycle.java`.

./gradlew test
./build-support/jenkin/build.sh

Bugs closed: AURORA-1950

Reviewed at https://reviews.apache.org/r/62626/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/7a803730
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/7a803730
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/7a803730

Branch: refs/heads/master
Commit: 7a803730c95fc7d1f788292d83c3d2eeb81a936d
Parents: dfd0677
Author: Jordan Ly <[email protected]>
Authored: Thu Sep 28 17:23:17 2017 -0700
Committer: Santhosh Kumar <[email protected]>
Committed: Thu Sep 28 17:23:17 2017 -0700

----------------------------------------------------------------------
 .../aurora/scheduler/app/SchedulerMain.java     | 23 ++++++-------
 .../scheduler/SchedulerLifecycleTest.java       | 34 ++++++++++++++++----
 2 files changed, 40 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/7a803730/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java 
b/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java
index 4b3ba5e..bb7055e 100644
--- a/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java
+++ b/src/main/java/org/apache/aurora/scheduler/app/SchedulerMain.java
@@ -143,16 +143,17 @@ public class SchedulerMain {
   }
 
   void run() {
-    startupServices.startAsync();
-    Runtime.getRuntime().addShutdownHook(new Thread(SchedulerMain.this::stop, 
"ShutdownHook"));
-    startupServices.awaitHealthy();
+    try {
+      startupServices.startAsync();
+      Runtime.getRuntime().addShutdownHook(new 
Thread(SchedulerMain.this::stop, "ShutdownHook"));
+      startupServices.awaitHealthy();
 
-    LeadershipListener leaderListener = schedulerLifecycle.prepare();
+      LeadershipListener leaderListener = schedulerLifecycle.prepare();
+
+      HostAndPort httpAddress = httpService.getAddress();
+      InetSocketAddress httpSocketAddress =
+          InetSocketAddress.createUnresolved(httpAddress.getHost(), 
httpAddress.getPort());
 
-    HostAndPort httpAddress = httpService.getAddress();
-    InetSocketAddress httpSocketAddress =
-        InetSocketAddress.createUnresolved(httpAddress.getHost(), 
httpAddress.getPort());
-    try {
       schedulerService.lead(
           httpSocketAddress,
           ImmutableMap.of(SERVERSET_ENDPOINT_NAME.get(), httpSocketAddress),
@@ -161,10 +162,10 @@ public class SchedulerMain {
       throw new IllegalStateException("Failed to lead service.", e);
     } catch (InterruptedException e) {
       throw new IllegalStateException("Interrupted while joining scheduler 
service group.", e);
+    } finally {
+      appLifecycle.awaitShutdown();
+      stop();
     }
-
-    appLifecycle.awaitShutdown();
-    stop();
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/aurora/blob/7a803730/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java 
b/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java
index 70479ef..edd7380 100644
--- a/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/SchedulerLifecycleTest.java
@@ -106,8 +106,14 @@ public class SchedulerLifecycleTest extends EasyMockTest {
     serviceManager.awaitHealthy();
   }
 
-  private void expectShutdown() throws Exception {
+  private void expectLeaderShutdown() throws Exception {
     leaderControl.leave();
+    expectShutdown();
+  }
+
+  private void expectShutdown() throws Exception {
+    // Shutdown before leadership is taken (ex. in prepare).
+
     expect(driver.stopAsync()).andReturn(driver);
     driver.awaitTerminated();
     storageUtil.storage.stop();
@@ -128,7 +134,7 @@ public class SchedulerLifecycleTest extends EasyMockTest {
     expectInitializeDriver();
 
     expectFullStartup();
-    expectShutdown();
+    expectLeaderShutdown();
 
     replayAndCreateLifecycle();
 
@@ -151,7 +157,7 @@ public class SchedulerLifecycleTest extends EasyMockTest {
     expect(driver.startAsync()).andReturn(driver);
     driver.awaitRunning();
 
-    expectShutdown();
+    expectLeaderShutdown();
 
     replayAndCreateLifecycle();
 
@@ -170,7 +176,7 @@ public class SchedulerLifecycleTest extends EasyMockTest {
     driver.awaitRunning();
 
     // Important piece here is what's absent - leader presence is not 
advertised.
-    expectShutdown();
+    expectLeaderShutdown();
 
     replayAndCreateLifecycle();
 
@@ -180,12 +186,28 @@ public class SchedulerLifecycleTest extends EasyMockTest {
   }
 
   @Test
+  public void testStoragePrepareFails() throws Exception {
+    storageUtil.storage.prepare();
+    expectLastCall().andThrow(new StorageException("Prepare failed."));
+    expectShutdown();
+
+    replayAndCreateLifecycle();
+
+    try {
+      schedulerLifecycle.prepare();
+      fail();
+    } catch (StorageException e) {
+      // Expected.
+    }
+  }
+
+  @Test
   public void testStorageStartFails() throws Exception {
     storageUtil.storage.prepare();
     storageUtil.expectOperations();
     storageUtil.storage.start(EasyMock.anyObject());
     expectLastCall().andThrow(new StorageException("Recovery failed."));
-    expectShutdown();
+    expectLeaderShutdown();
 
     replayAndCreateLifecycle();
 
@@ -209,7 +231,7 @@ public class SchedulerLifecycleTest extends EasyMockTest {
     expectInitializeDriver();
 
     expectFullStartup();
-    expectShutdown();
+    expectLeaderShutdown();
     expect(serviceManager.stopAsync()).andReturn(serviceManager);
     serviceManager.awaitStopped(5, TimeUnit.SECONDS);
 

Reply via email to