This is an automated email from the ASF dual-hosted git repository. dlych pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit eaf4ef23a8a6b31707e3507d4b00e071722b0faf Author: Murtadha Hubail <[email protected]> AuthorDate: Mon May 16 11:38:30 2022 +0300 [NO ISSUE][FAIL] Halt on unexpected exceptions during active recovery - user model changes: no - storage format changes: no - interface changes: no Details: - In case of unexpected exceptions during active recovery, the recovery attempts will stop forever. To avoid that, we halt and recovery will be resumed on the next restart. Change-Id: Iddbac304981c520cac7aadebcd738b038554246e Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16363 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> Reviewed-by: Michael Blow <[email protected]> Tested-by: Jenkins <[email protected]> --- .../java/org/apache/asterix/app/active/RecoveryTask.java | 16 +++++++++++++++- .../src/main/java/org/apache/hyracks/util/ExitUtil.java | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java index 3bc1c2830b..34a54d1df8 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.app.active; +import static org.apache.hyracks.util.ExitUtil.EC_ACTIVE_RECOVERY_FAILURE; + import java.util.concurrent.Callable; import org.apache.asterix.active.ActivityState; @@ -33,6 +35,7 @@ import org.apache.asterix.metadata.declared.MetadataProvider; import org.apache.asterix.metadata.entities.Dataset; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.util.ExitUtil; import org.apache.hyracks.util.IRetryPolicy; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; @@ -63,7 +66,18 @@ public class RecoveryTask { IRetryPolicy policy = retryPolicyFactory.create(listener); return () -> { Thread.currentThread().setName("RecoveryTask (" + listener.getEntityId() + ")"); - doRecover(policy); + try { + doRecover(policy); + } catch (InterruptedException e) { + LOGGER.warn("recovery task interrupted", e); + Thread.currentThread().interrupt(); + throw e; + } catch (Throwable t) { + // in case of any unexpected exception during recovery, the recovery attempts will stop forever. + // we halt to ensure recovery attempts are resumed after the restart + LOGGER.fatal("unexpected exception during recovery; halting...", t); + ExitUtil.halt(EC_ACTIVE_RECOVERY_FAILURE); + } return null; }; } diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java index beabb5d28c..8f8e8f6228 100644 --- a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java +++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java @@ -55,6 +55,7 @@ public class ExitUtil { public static final int EC_ACTIVE_SUSPEND_FAILURE = 17; public static final int EC_ACTIVE_RESUME_FAILURE = 18; public static final int EC_NC_FAILED_TO_NOTIFY_TASKS_COMPLETED = 19; + public static final int EC_ACTIVE_RECOVERY_FAILURE = 20; public static final int EC_FAILED_TO_CANCEL_ACTIVE_START_STOP = 22; public static final int EC_INCONSISTENT_STORAGE_REFERENCES = 23; public static final int EC_IMMEDIATE_HALT = 33;
