[
https://issues.apache.org/jira/browse/HDDS-13956?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HDDS-13956:
----------------------------------
Labels: pull-request-available (was: )
> Ozone Recon - Reliability and data integrity improvement in code
> ----------------------------------------------------------------
>
> Key: HDDS-13956
> URL: https://issues.apache.org/jira/browse/HDDS-13956
> Project: Apache Ozone
> Issue Type: Task
> Components: Ozone Recon
> Affects Versions: 2.0.0
> Reporter: Devesh Kumar Singh
> Assignee: Devesh Kumar Singh
> Priority: Major
> Labels: pull-request-available
>
> *Location:* ReconTaskControllerImpl.java
>
>
> {code:java}
> public synchronized void stop() {
> LOG.info("Stopping Recon Task Controller.");
> if (this.executorService != null) {
> this.executorService.shutdownNow(); // No awaitTermination
> }
> if (this.eventProcessingExecutor != null) {
> this.eventProcessingExecutor.shutdownNow(); // No awaitTermination
> }
> }
> {code}
>
>
> *Impact:* *Service reliability, data integrity* *Likelihood:* *High (every
> service shutdown)*
>
> *Java 8 Compatible Fix:*
>
>
> {code:java}
> private static final int SHUTDOWN_TIMEOUT_SECONDS = 30;
>
> public synchronized void stop() {
> LOG.info("Stopping Recon Task Controller.");
> shutdownExecutorGracefully(this.executorService, "main task executor");
> shutdownExecutorGracefully(this.eventProcessingExecutor, "event
> processing executor");
> }
>
> {code}
>
>
> {code:java}
> private void shutdownExecutorGracefully(ExecutorService executor, String
> name) {
> if (executor == null) return;
>
> executor.shutdown();
> try {
> if (!executor.awaitTermination(SHUTDOWN_TIMEOUT_SECONDS,
> TimeUnit.SECONDS)) {
> LOG.warn("Executor {} did not terminate within {} seconds,
> forcing shutdown",
> name, SHUTDOWN_TIMEOUT_SECONDS);
> executor.shutdownNow();
> if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
> LOG.error("Executor {} did not terminate after forced
> shutdown", name);
> }
> }
> } catch (InterruptedException e) {
> LOG.warn("Interrupted while waiting for {} to terminate", name);
> executor.shutdownNow();
> Thread.currentThread().interrupt();
> }
> }
> {code}
>
> *Location:* OzoneManagerServiceProviderImpl.java:
>
> {code:java}
> scheduler.shutdownNow(); // No awaitTermination
>
> {code}
>
> *Risk:* Scheduler threads may not terminate, causing resource leaks and
> preventing JVM shutdown *Impact:* *Resource exhaustion, service restart
> failures*
>
> *Fix:*
>
> {code:java}
> private void stopSyncDataFromOMThread() {
> scheduler.shutdown();
> try {
> if (!scheduler.awaitTermination(30, TimeUnit.SECONDS)) {
> scheduler.shutdownNow();
> if (!scheduler.awaitTermination(5, TimeUnit.SECONDS)) {
> LOG.error("OM sync scheduler failed to terminate");
> }
> }
> } catch (InterruptedException e) {
> scheduler.shutdownNow();
> Thread.currentThread().interrupt();
> }
> tarExtractor.stop();
> LOG.debug("Shutdown the OM DB sync scheduler.");
> }
> {code}
>
>
>
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]