Repository: aurora Updated Branches: refs/heads/master 8383ed511 -> f9a47913a
Close AsyncHttpClient on scheduler shutdown. Convert SlaManager into an AbstractIdleService and explicitly close the AsyncHttpClient on scheduler shutdown. Otherwise we run the rise of having a stuck scheduler JVM that is unable to shutdown due to any on the remaining non-daemon http client threads. Testing Done: ./gradlew test **Tested in vagrant:** Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.456 [BlockingDriverJoin, StateMachine] SchedulerLifecycle state machine transition DEAD -> DEAD Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.457 [BlockingDriverJoin, SchedulerLifecycle] Shutdown already invoked, ignoring extra call. Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.458 [TearDownShutdownRegistry STOPPING, StateMachine] storage state machine transition READY -> STOPPED Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.459 [TearDownShutdownRegistry STOPPING, Lifecycle] Shutting down application Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.459 [TearDownShutdownRegistry STOPPING, ShutdownRegistry$ShutdownRegistryImpl] Action controller has already completed, subsequent calls ignored. Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.461 [main, SchedulerMain] Stopping scheduler services. **Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.470 [SlaManager$$EnhancerByGuice$$40d3047 STOPPING, SlaManager] Shutting down SlaManager async http client.** Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.475 [CronLifecycle STOPPING, CronLifecycle] Shutting down Quartz cron scheduler. ... Jun 15 20:48:56 aurora aurora-scheduler[8719]: I0615 20:48:56.167 [main, SchedulerMain] Application run() exited. Bugs closed: AURORA-1990 Reviewed at https://reviews.apache.org/r/67613/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/f9a47913 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/f9a47913 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/f9a47913 Branch: refs/heads/master Commit: f9a47913a616efc86eb08e539258a19bf8e9e0b0 Parents: 8383ed5 Author: Santhosh Kumar Shanmugham <[email protected]> Authored: Fri Jun 15 15:08:22 2018 -0700 Committer: Santhosh Kumar <[email protected]> Committed: Fri Jun 15 15:08:22 2018 -0700 ---------------------------------------------------------------------- .../aurora/scheduler/events/WebhookModule.java | 1 + .../org/apache/aurora/scheduler/sla/SlaManager.java | 15 ++++++++++++++- .../org/apache/aurora/scheduler/sla/SlaModule.java | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/f9a47913/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java b/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java index 5ad1251..ac3884c 100644 --- a/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java +++ b/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java @@ -80,6 +80,7 @@ public class WebhookModule extends AbstractModule { if (webhookConfig.isPresent()) { WebhookInfo webhookInfo = parseWebhookConfig(webhookConfig.get()); DefaultAsyncHttpClientConfig config = new DefaultAsyncHttpClientConfig.Builder() + .setThreadPoolName("WebHook-AsyncHttpClient") .setConnectTimeout(webhookInfo.getConnectonTimeoutMsec()) .setHandshakeTimeout(webhookInfo.getConnectonTimeoutMsec()) .setSslSessionTimeout(webhookInfo.getConnectonTimeoutMsec()) http://git-wip-us.apache.org/repos/asf/aurora/blob/f9a47913/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java b/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java index 98bec48..9c5caf4 100644 --- a/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java +++ b/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java @@ -30,6 +30,7 @@ import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.AbstractIdleService; import com.google.common.util.concurrent.Striped; import com.google.gson.Gson; import com.google.inject.Inject; @@ -71,8 +72,9 @@ import static java.util.Objects.requireNonNull; * Provides methods for performing SLA-safe work. It is used for maintenance and job update * operations to guarantee that a job's SLA requirements are always satisfied. */ -public class SlaManager { +public class SlaManager extends AbstractIdleService { private static final Logger LOG = LoggerFactory.getLogger(SlaManager.class); + @VisibleForTesting @Qualifier @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) @@ -434,4 +436,15 @@ public class SlaManager { } return true; } + + @Override + protected void startUp() { + //no-op + } + + @Override + protected void shutDown() throws Exception { + LOG.info("Shutting down SlaManager async http client."); + httpClient.close(); + } } http://git-wip-us.apache.org/repos/asf/aurora/blob/f9a47913/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java b/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java index 07082a9..27bbaa8 100644 --- a/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java +++ b/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java @@ -131,6 +131,7 @@ public class SlaModule extends AbstractModule { SchedulerServicesModule.addSchedulerActiveServiceBinding(binder()).to(SlaUpdater.class); DefaultAsyncHttpClientConfig config = new DefaultAsyncHttpClientConfig.Builder() + .setThreadPoolName("SlaManager-AsyncHttpClient") .setConnectTimeout(options.slaCoordinatorTimeout.as(Time.MILLISECONDS).intValue()) .setHandshakeTimeout(options.slaCoordinatorTimeout.as(Time.MILLISECONDS).intValue()) .setSslSessionTimeout(options.slaCoordinatorTimeout.as(Time.MILLISECONDS).intValue()) @@ -159,6 +160,7 @@ public class SlaModule extends AbstractModule { "SlaManager-%d", LOG)); bind(SlaManager.class).in(javax.inject.Singleton.class); + SchedulerServicesModule.addSchedulerActiveServiceBinding(binder()).to(SlaManager.class); } // TODO(ksweeney): This should use AbstractScheduledService.
