gianm closed pull request #6516: fix exception in Supervisor.start causing
overlord unable to become leader
URL: https://github.com/apache/incubator-druid/pull/6516
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java
b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java
index cfca0de4b60..f2c78a66b8a 100644
---
a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java
+++
b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java
@@ -133,7 +133,12 @@ public void start()
for (String id : supervisors.keySet()) {
SupervisorSpec spec = supervisors.get(id);
if (!(spec instanceof NoopSupervisorSpec)) {
- createAndStartSupervisorInternal(spec, false);
+ try {
+ createAndStartSupervisorInternal(spec, false);
+ }
+ catch (Exception ex) {
+ log.error(ex, "Failed to start supervisor: [%s]", spec.getId());
+ }
}
}
diff --git
a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java
b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java
index 85cfd95c726..7ae3b994ace 100644
---
a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java
+++
b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java
@@ -228,6 +228,26 @@ public void testStartAlreadyStarted()
manager.start();
}
+ @Test
+ public void testStartIndividualSupervisorsFailStart()
+ {
+ Map<String, SupervisorSpec> existingSpecs = ImmutableMap.of(
+ "id1", new TestSupervisorSpec("id1", supervisor1),
+ "id3", new TestSupervisorSpec("id3", supervisor3)
+ );
+
+
+
EasyMock.expect(metadataSupervisorManager.getLatest()).andReturn(existingSpecs);
+ supervisor3.start();
+ supervisor1.start();
+ EasyMock.expectLastCall().andThrow(new RuntimeException("supervisor
explosion"));
+ replayAll();
+
+ manager.start();
+
+ // if we get here, we are properly insulated from exploding supervisors
+ }
+
@Test
public void testStopThrowsException()
{
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]