This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new bd7208d  [NO ISSUE][CLUS] Interrupt global recovery on node failure
bd7208d is described below

commit bd7208d093c95a1f4ff906ebaedddd7086cc58e8
Author: Murtadha Hubail <[email protected]>
AuthorDate: Wed Nov 10 22:29:34 2021 +0300

    [NO ISSUE][CLUS] Interrupt global recovery on node failure
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    
    - When a node fails while global recovery is on-going, interrupt
      recovery to avoid unnecessary waiting.
    
    Change-Id: I58852e046ff4021f4c5d115f5c3488b249fc61a2
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/14025
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Murtadha Hubail <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
---
 .../asterix/hyracks/bootstrap/GlobalRecoveryManager.java    | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
index 9438b16..e6ef8df 100644
--- 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
+++ 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
@@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.asterix.app.message.StorageCleanupRequestMessage;
@@ -64,6 +65,7 @@ public class GlobalRecoveryManager implements 
IGlobalRecoveryManager {
     protected final IHyracksClientConnection hcc;
     protected volatile boolean recoveryCompleted;
     protected volatile boolean recovering;
+    protected Future<?> recoveryFuture;
 
     public GlobalRecoveryManager(ICCServiceContext serviceCtx, 
IHyracksClientConnection hcc,
             IStorageComponentProvider componentProvider) {
@@ -98,7 +100,7 @@ public class GlobalRecoveryManager implements 
IGlobalRecoveryManager {
                      * Perform recovery on a different thread to avoid 
deadlocks in
                      * {@link 
org.apache.asterix.common.cluster.IClusterStateManager}
                      */
-                    serviceCtx.getControllerService().getExecutor().submit(() 
-> {
+                    recoveryFuture = 
serviceCtx.getControllerService().getExecutor().submit(() -> {
                         try {
                             recover(appCtx);
                         } catch (Throwable e) {
@@ -127,6 +129,9 @@ public class GlobalRecoveryManager implements 
IGlobalRecoveryManager {
         MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
         recoveryCompleted = true;
         recovering = false;
+        synchronized (this) {
+            recoveryFuture = null;
+        }
         LOGGER.info("Global Recovery Completed. Refreshing cluster state...");
         appCtx.getClusterStateManager().refreshState();
     }
@@ -166,6 +171,12 @@ public class GlobalRecoveryManager implements 
IGlobalRecoveryManager {
 
     @Override
     public void notifyStateChange(ClusterState newState) {
+        synchronized (this) {
+            if (recovering && newState == ClusterState.UNUSABLE && 
recoveryFuture != null) {
+                // interrupt the recovery attempt since cluster became 
unusable during global recovery
+                recoveryFuture.cancel(true);
+            }
+        }
         if (newState != ClusterState.ACTIVE && newState != 
ClusterState.RECOVERING) {
             recoveryCompleted = false;
         }

Reply via email to