This is an automated email from the ASF dual-hosted git repository.
mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new bd7208d [NO ISSUE][CLUS] Interrupt global recovery on node failure
bd7208d is described below
commit bd7208d093c95a1f4ff906ebaedddd7086cc58e8
Author: Murtadha Hubail <[email protected]>
AuthorDate: Wed Nov 10 22:29:34 2021 +0300
[NO ISSUE][CLUS] Interrupt global recovery on node failure
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- When a node fails while global recovery is on-going, interrupt
recovery to avoid unnecessary waiting.
Change-Id: I58852e046ff4021f4c5d115f5c3488b249fc61a2
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/14025
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Murtadha Hubail <[email protected]>
Reviewed-by: Ali Alsuliman <[email protected]>
---
.../asterix/hyracks/bootstrap/GlobalRecoveryManager.java | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
index 9438b16..e6ef8df 100644
---
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
+++
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/GlobalRecoveryManager.java
@@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.asterix.app.message.StorageCleanupRequestMessage;
@@ -64,6 +65,7 @@ public class GlobalRecoveryManager implements
IGlobalRecoveryManager {
protected final IHyracksClientConnection hcc;
protected volatile boolean recoveryCompleted;
protected volatile boolean recovering;
+ protected Future<?> recoveryFuture;
public GlobalRecoveryManager(ICCServiceContext serviceCtx,
IHyracksClientConnection hcc,
IStorageComponentProvider componentProvider) {
@@ -98,7 +100,7 @@ public class GlobalRecoveryManager implements
IGlobalRecoveryManager {
* Perform recovery on a different thread to avoid
deadlocks in
* {@link
org.apache.asterix.common.cluster.IClusterStateManager}
*/
- serviceCtx.getControllerService().getExecutor().submit(()
-> {
+ recoveryFuture =
serviceCtx.getControllerService().getExecutor().submit(() -> {
try {
recover(appCtx);
} catch (Throwable e) {
@@ -127,6 +129,9 @@ public class GlobalRecoveryManager implements
IGlobalRecoveryManager {
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
recoveryCompleted = true;
recovering = false;
+ synchronized (this) {
+ recoveryFuture = null;
+ }
LOGGER.info("Global Recovery Completed. Refreshing cluster state...");
appCtx.getClusterStateManager().refreshState();
}
@@ -166,6 +171,12 @@ public class GlobalRecoveryManager implements
IGlobalRecoveryManager {
@Override
public void notifyStateChange(ClusterState newState) {
+ synchronized (this) {
+ if (recovering && newState == ClusterState.UNUSABLE &&
recoveryFuture != null) {
+ // interrupt the recovery attempt since cluster became
unusable during global recovery
+ recoveryFuture.cancel(true);
+ }
+ }
if (newState != ClusterState.ACTIVE && newState !=
ClusterState.RECOVERING) {
recoveryCompleted = false;
}