hbase git commit: HBASE-20173 [AMv2] DisableTableProcedure concurrent to ServerCrashProcedure can deadlock

stack Tue, 13 Mar 2018 06:04:36 -0700

Repository: hbase
Updated Branches:
  refs/heads/branch-2.0 c998e8d5f -> 655592fc9



HBASE-20173 [AMv2] DisableTableProcedure concurrent to ServerCrashProcedure can 
deadlock

Allow that DisableTableProcedue can grab a region lock before
ServerCrashProcedure can. Cater to this cricumstance where SCP
was not unable to make progress by running the search for RIT
against the crashed server a second time, post creation of all
crashed-server assignemnts. The second run will uncover such as
the above DisableTableProcedure unassign and will interrupt its
suspend allowing both procedures to make progress.

M hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
 Add new procedure step post-assigns that reruns the RIT finder method.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 Make this important log more specific as to what is going on.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 Better explanation as to what is going on.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
 Add extra step and run handleRIT a second time after we've queued up
 all SCP assigns. Also fix a but. SCP was adding an assign of a RIT
 that was actually trying to unassign (made the deadlock more likely).


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/655592fc
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/655592fc
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/655592fc

Branch: refs/heads/branch-2.0
Commit: 655592fc93467cabdbea7f7fcf2c55cd601aafd5
Parents: c998e8d
Author: Michael Stack <[email protected]>
Authored: Sun Mar 11 13:15:16 2018 -0700
Committer: Michael Stack <[email protected]>
Committed: Tue Mar 13 05:58:37 2018 -0700

----------------------------------------------------------------------
 .../hbase/IntegrationTestDDLMasterFailover.java |   6 +-
 .../src/main/protobuf/MasterProcedure.proto     |   1 +
 .../master/assignment/AssignmentManager.java    |   2 +-
 .../master/assignment/UnassignProcedure.java    |  13 +-
 .../master/procedure/RecoverMetaProcedure.java  |   2 +
 .../master/procedure/ServerCrashProcedure.java  | 158 +++++++++++--------
 6 files changed, 103 insertions(+), 79 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/655592fc/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDDLMasterFailover.java
----------------------------------------------------------------------
diff --git 
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDDLMasterFailover.java
 
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDDLMasterFailover.java
index d9a2f94..4d0d7e0 100644
--- 
a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDDLMasterFailover.java
+++ 
b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDDLMasterFailover.java
@@ -53,12 +53,12 @@ import org.slf4j.LoggerFactory;
 
 /**
  *
- * Integration test that verifies Procedure V2. <br/><br/>
+ * Integration test that verifies Procedure V2.
  *
  * DDL operations should go through (rollforward or rollback) when primary 
master is killed by
- * ChaosMonkey (default MASTER_KILLING)<br/><br/>
+ * ChaosMonkey (default MASTER_KILLING).
  *
- * Multiple Worker threads are started to randomly do the following Actions in 
loops:<br/>
+ * <p></p>Multiple Worker threads are started to randomly do the following 
Actions in loops:
  * Actions generating and populating tables:
  * <ul>
  *     <li>CreateTableAction</li>

http://git-wip-us.apache.org/repos/asf/hbase/blob/655592fc/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto 
b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
index 5b42ca9..8f03a9f 100644
--- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
@@ -301,6 +301,7 @@ enum ServerCrashState {
   // Removed SERVER_CRASH_CALC_REGIONS_TO_ASSIGN = 7;
   SERVER_CRASH_ASSIGN = 8;
   SERVER_CRASH_WAIT_ON_ASSIGN = 9;
+  SERVER_CRASH_HANDLE_RIT2 = 20;
   SERVER_CRASH_FINISH = 100;
 }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/655592fc/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index a48ed75..33a8d21 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -1196,7 +1196,7 @@ public class AssignmentManager implements ServerListener {
   private void handleRegionOverStuckWarningThreshold(final RegionInfo 
regionInfo) {
     final RegionStateNode regionNode = 
regionStates.getRegionStateNode(regionInfo);
     //if (regionNode.isStuck()) {
-    LOG.warn("TODO Handle stuck in transition: " + regionNode);
+    LOG.warn("STUCK Region-In-Transition {}", regionNode);
   }
 
   // 
============================================================================================

http://git-wip-us.apache.org/repos/asf/hbase/blob/655592fc/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
index 3454d96..03f5213 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
@@ -249,10 +249,10 @@ public class UnassignProcedure extends 
RegionTransitionProcedure {
       final IOException exception) {
     // TODO: Is there on-going rpc to cleanup?
     if (exception instanceof ServerCrashException) {
-      // This exception comes from ServerCrashProcedure after log splitting.
+      // This exception comes from ServerCrashProcedure AFTER log splitting.
       // SCP found this region as a RIT. Its call into here says it is ok to 
let this procedure go
-      // on to a complete close now. This will release lock on this region so 
subsequent action on
-      // region can succeed; e.g. the assign that follows this unassign when a 
move (w/o wait on SCP
+      // complete. This complete will release lock on this region so 
subsequent action on region
+      // can succeed; e.g. the assign that follows this unassign when a move 
(w/o wait on SCP
       // the assign could run w/o logs being split so data loss).
       try {
         reportTransition(env, regionNode, TransitionCode.CLOSED, 
HConstants.NO_SEQNUM);
@@ -263,7 +263,6 @@ public class UnassignProcedure extends 
RegionTransitionProcedure {
     } else if (exception instanceof RegionServerAbortedException ||
         exception instanceof RegionServerStoppedException ||
         exception instanceof ServerNotRunningYetException) {
-      // TODO
       // RS is aborting, we cannot offline the region since the region may 
need to do WAL
       // recovery. Until we see the RS expiration, we should retry.
       // TODO: This should be suspend like the below where we call expire on 
server?
@@ -276,8 +275,10 @@ public class UnassignProcedure extends 
RegionTransitionProcedure {
       LOG.warn("Expiring server " + this + "; " + regionNode.toShortString() +
         ", exception=" + exception);
       
env.getMasterServices().getServerManager().expireServer(regionNode.getRegionLocation());
-      // Return false so this procedure stays in suspended state. It will be 
woken up by a
-      // ServerCrashProcedure when it notices this RIT.
+      // Return false so this procedure stays in suspended state. It will be 
woken up by the
+      // ServerCrashProcedure that was scheduled when we called #expireServer 
above. SCP calls
+      // #handleRIT which will call this method only the exception will be a 
ServerCrashException
+      // this time around (See above).
       // TODO: Add a SCP as a new subprocedure that we now come to depend on.
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/655592fc/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
index 70d0d55..301cd18 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
@@ -159,6 +159,8 @@ public class RecoverMetaProcedure
    * just as we do over in ServerCrashProcedure#handleRIT except less to do 
here; less context
    * to carry.
    */
+  // NOTE: Make sure any fix or improvement done here is also done in 
SCP#handleRIT; the methods
+  // have overlap.
   private void handleRIT(MasterProcedureEnv env, RegionInfo ri, ServerName 
crashedServerName) {
     AssignmentManager am = env.getAssignmentManager();
     RegionTransitionProcedure rtp = 
am.getRegionStates().getRegionTransitionProcedure(ri);

http://git-wip-us.apache.org/repos/asf/hbase/blob/655592fc/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 7352826..3d66072 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.procedure;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 
@@ -28,7 +29,6 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.master.MasterWalManager;
-import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
@@ -114,68 +114,79 @@ implements ServerProcedureInterface {
 
     try {
       switch (state) {
-      case SERVER_CRASH_START:
-        LOG.info("Start " + this);
-        // If carrying meta, process it first. Else, get list of regions on 
crashed server.
-        if (this.carryingMeta) {
-          setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META);
-        } else {
+        case SERVER_CRASH_START:
+          LOG.info("Start " + this);
+          // If carrying meta, process it first. Else, get list of regions on 
crashed server.
+          if (this.carryingMeta) {
+            setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META);
+          } else {
+            setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
+          }
+          break;
+
+        case SERVER_CRASH_GET_REGIONS:
+          // If hbase:meta is not assigned, yield.
+          if (env.getAssignmentManager().waitMetaLoaded(this)) {
+            throw new ProcedureSuspendedException();
+          }
+
+          this.regionsOnCrashedServer = 
services.getAssignmentManager().getRegionStates()
+            .getServerRegionInfoSet(serverName);
+          // Where to go next? Depends on whether we should split logs at all 
or
+          // if we should do distributed log splitting.
+          if (!this.shouldSplitWal) {
+            setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
+          } else {
+            setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
+          }
+          break;
+
+        case SERVER_CRASH_PROCESS_META:
+          processMeta(env);
           setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
-        }
-        break;
-
-      case SERVER_CRASH_GET_REGIONS:
-        // If hbase:meta is not assigned, yield.
-        if (env.getAssignmentManager().waitMetaLoaded(this)) {
-          throw new ProcedureSuspendedException();
-        }
-
-        this.regionsOnCrashedServer = 
services.getAssignmentManager().getRegionStates()
-          .getServerRegionInfoSet(serverName);
-        // Where to go next? Depends on whether we should split logs at all or
-        // if we should do distributed log splitting.
-        if (!this.shouldSplitWal) {
+          break;
+
+        case SERVER_CRASH_SPLIT_LOGS:
+          splitLogs(env);
           setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
-        } else {
-          setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
-        }
-        break;
-
-      case SERVER_CRASH_PROCESS_META:
-        processMeta(env);
-        setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
-        break;
-
-      case SERVER_CRASH_SPLIT_LOGS:
-        splitLogs(env);
-        setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
-        break;
-
-      case SERVER_CRASH_ASSIGN:
-        // If no regions to assign, skip assign and skip to the finish.
-        // Filter out meta regions. Those are handled elsewhere in this 
procedure.
-        // Filter changes this.regionsOnCrashedServer.
-        if (filterDefaultMetaRegions(regionsOnCrashedServer)) {
-          if (LOG.isTraceEnabled()) {
-            LOG.trace("Assigning regions " +
-              RegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + 
this +
-              "; cycles=" + getCycles());
+          break;
+
+        case SERVER_CRASH_ASSIGN:
+          // If no regions to assign, skip assign and skip to the finish.
+          // Filter out meta regions. Those are handled elsewhere in this 
procedure.
+          // Filter changes this.regionsOnCrashedServer.
+          if (filterDefaultMetaRegions(regionsOnCrashedServer)) {
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Assigning regions " +
+                RegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + 
this +
+                "; cycles=" + getCycles());
+            }
+            // Handle RIT against crashed server. Will cancel any ongoing 
assigns/unassigns.
+            // Returns list of regions we need to reassign.
+            List<RegionInfo> toAssign = handleRIT(env, regionsOnCrashedServer);
+            AssignmentManager am = env.getAssignmentManager();
+            // CreateAssignProcedure will try to use the old location for the 
region deploy.
+            addChildProcedure(am.createAssignProcedures(toAssign));
+            setNextState(ServerCrashState.SERVER_CRASH_HANDLE_RIT2);
+          } else {
+            setNextState(ServerCrashState.SERVER_CRASH_FINISH);
           }
+          break;
+
+        case SERVER_CRASH_HANDLE_RIT2:
+          // Run the handleRIT again for case where another procedure managed 
to grab the lock on
+          // a region ahead of this crash handling procedure. Can happen in 
rare case. See
           handleRIT(env, regionsOnCrashedServer);
-          AssignmentManager am = env.getAssignmentManager();
-          // createAssignProcedure will try to use the old location for the 
region deploy.
-          addChildProcedure(am.createAssignProcedures(regionsOnCrashedServer));
-        }
-        setNextState(ServerCrashState.SERVER_CRASH_FINISH);
-        break;
-
-      case SERVER_CRASH_FINISH:
-        
services.getAssignmentManager().getRegionStates().removeServer(serverName);
-        services.getServerManager().getDeadServers().finish(serverName);
-        return Flow.NO_MORE_STATE;
-
-      default:
-        throw new UnsupportedOperationException("unhandled state=" + state);
+          setNextState(ServerCrashState.SERVER_CRASH_FINISH);
+          break;
+
+        case SERVER_CRASH_FINISH:
+          
services.getAssignmentManager().getRegionStates().removeServer(serverName);
+          services.getServerManager().getDeadServers().finish(serverName);
+          return Flow.NO_MORE_STATE;
+
+        default:
+          throw new UnsupportedOperationException("unhandled state=" + state);
       }
     } catch (IOException e) {
       LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + 
getCycles(), e);
@@ -360,18 +371,24 @@ implements ServerProcedureInterface {
    * otherwise we have two assigns going on and they will fight over who has 
lock.
    * Notify Unassigns. If unable to unassign because server went away, 
unassigns block waiting
    * on the below callback from a ServerCrashProcedure before proceeding.
-   * @param env
-   * @param regions Regions that were on crashed server
+   * @param regions Regions on the Crashed Server.
+   * @return List of regions we should assign to new homes (not same as 
regions on crashed server).
    */
-  private void handleRIT(final MasterProcedureEnv env, final List<RegionInfo> 
regions) {
-    if (regions == null) return;
+  private List<RegionInfo> handleRIT(final MasterProcedureEnv env, 
List<RegionInfo> regions) {
+    if (regions == null || regions.isEmpty()) {
+      return Collections.emptyList();
+    }
     AssignmentManager am = env.getMasterServices().getAssignmentManager();
-    final Iterator<RegionInfo> it = regions.iterator();
+    List<RegionInfo> toAssign = new ArrayList<RegionInfo>(regions);
+    // Get an iterator so can remove items.
+    final Iterator<RegionInfo> it = toAssign.iterator();
     ServerCrashException sce = null;
     while (it.hasNext()) {
       final RegionInfo hri = it.next();
       RegionTransitionProcedure rtp = 
am.getRegionStates().getRegionTransitionProcedure(hri);
-      if (rtp == null) continue;
+      if (rtp == null) {
+        continue;
+      }
       // Make sure the RIT is against this crashed server. In the case where 
there are many
       // processings of a crashed server -- backed up for whatever reason 
(slow WAL split) --
       // then a previous SCP may have already failed an assign, etc., and it 
may have a new
@@ -389,11 +406,14 @@ implements ServerProcedureInterface {
         sce = new ServerCrashException(getProcId(), getServerName());
       }
       rtp.remoteCallFailed(env, this.serverName, sce);
-      if (rtp instanceof AssignProcedure) {
-        // If an assign, include it in our return and remove from passed-in 
list of regions.
-        it.remove();
-      }
+      // If an assign, remove from passed-in list of regions so we 
subsequently do not create
+      // a new assign; the exisitng assign after the call to remoteCallFailed 
will recalibrate
+      // and assign to a server other than the crashed one; no need to create 
new assign.
+      // If an unassign, do not return this region; the above cancel will wake 
up the unassign and
+      // it will complete. Done.
+      it.remove();
     }
+    return toAssign;
   }
 
   @Override

hbase git commit: HBASE-20173 [AMv2] DisableTableProcedure concurrent to ServerCrashProcedure can deadlock

Reply via email to