This is an automated email from the ASF dual-hosted git repository.

dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit eaf4ef23a8a6b31707e3507d4b00e071722b0faf
Author: Murtadha Hubail <[email protected]>
AuthorDate: Mon May 16 11:38:30 2022 +0300

    [NO ISSUE][FAIL] Halt on unexpected exceptions during active recovery
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    
    - In case of unexpected exceptions during active recovery, the recovery
      attempts will stop forever. To avoid that, we halt and recovery will
      be resumed on the next restart.
    
    Change-Id: Iddbac304981c520cac7aadebcd738b038554246e
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16363
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
    Reviewed-by: Michael Blow <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../java/org/apache/asterix/app/active/RecoveryTask.java | 16 +++++++++++++++-
 .../src/main/java/org/apache/hyracks/util/ExitUtil.java  |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java
 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java
index 3bc1c2830b..34a54d1df8 100644
--- 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java
+++ 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/RecoveryTask.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.app.active;
 
+import static org.apache.hyracks.util.ExitUtil.EC_ACTIVE_RECOVERY_FAILURE;
+
 import java.util.concurrent.Callable;
 
 import org.apache.asterix.active.ActivityState;
@@ -33,6 +35,7 @@ import org.apache.asterix.metadata.declared.MetadataProvider;
 import org.apache.asterix.metadata.entities.Dataset;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.ExitUtil;
 import org.apache.hyracks.util.IRetryPolicy;
 import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
@@ -63,7 +66,18 @@ public class RecoveryTask {
         IRetryPolicy policy = retryPolicyFactory.create(listener);
         return () -> {
             Thread.currentThread().setName("RecoveryTask (" + 
listener.getEntityId() + ")");
-            doRecover(policy);
+            try {
+                doRecover(policy);
+            } catch (InterruptedException e) {
+                LOGGER.warn("recovery task interrupted", e);
+                Thread.currentThread().interrupt();
+                throw e;
+            } catch (Throwable t) {
+                // in case of any unexpected exception during recovery, the 
recovery attempts will stop forever.
+                // we halt to ensure recovery attempts are resumed after the 
restart
+                LOGGER.fatal("unexpected exception during recovery; 
halting...", t);
+                ExitUtil.halt(EC_ACTIVE_RECOVERY_FAILURE);
+            }
             return null;
         };
     }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
index beabb5d28c..8f8e8f6228 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
@@ -55,6 +55,7 @@ public class ExitUtil {
     public static final int EC_ACTIVE_SUSPEND_FAILURE = 17;
     public static final int EC_ACTIVE_RESUME_FAILURE = 18;
     public static final int EC_NC_FAILED_TO_NOTIFY_TASKS_COMPLETED = 19;
+    public static final int EC_ACTIVE_RECOVERY_FAILURE = 20;
     public static final int EC_FAILED_TO_CANCEL_ACTIVE_START_STOP = 22;
     public static final int EC_INCONSISTENT_STORAGE_REFERENCES = 23;
     public static final int EC_IMMEDIATE_HALT = 33;

Reply via email to