Repository: hbase
Updated Branches:
  refs/heads/branch-2 4d971d0f4 -> 6befdc43b


HBASE-20700 Move meta region when server crash can cause the procedure to be 
stuck


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6befdc43
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6befdc43
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6befdc43

Branch: refs/heads/branch-2
Commit: 6befdc43baff4b3773dd0335d5531f651035791a
Parents: 4d971d0
Author: zhangduo <zhang...@apache.org>
Authored: Mon Jun 11 14:57:31 2018 +0800
Committer: zhangduo <zhang...@apache.org>
Committed: Mon Jun 11 15:28:21 2018 +0800

----------------------------------------------------------------------
 .../hbase/procedure2/LockedResourceType.java    |   2 +-
 .../org/apache/hadoop/hbase/master/HMaster.java |  23 ++--
 .../hbase/master/MasterMetaBootstrap.java       |  15 +--
 .../master/assignment/AssignmentManager.java    |  55 +++++----
 .../hbase/master/assignment/RegionStates.java   |  68 +++++++----
 .../assignment/RegionTransitionProcedure.java   |   2 +-
 .../master/assignment/UnassignProcedure.java    |  27 ++++-
 .../procedure/MasterProcedureScheduler.java     | 112 ++++++++++++++-----
 .../procedure/MetaProcedureInterface.java       |  32 ++++++
 .../hbase/master/procedure/MetaQueue.java       |  36 ++++++
 .../procedure/PeerProcedureInterface.java       |   2 -
 .../master/procedure/RecoverMetaProcedure.java  |  26 ++---
 .../hbase/master/procedure/SchemaLocking.java   |  17 ++-
 .../master/procedure/ServerCrashProcedure.java  |  16 +--
 .../procedure/ServerProcedureInterface.java     |   2 -
 .../procedure/TableProcedureInterface.java      |   3 -
 .../hbase/master/DummyRegionProcedure.java      |  82 ++++++++++++++
 .../hbase/master/DummyRegionProcedureState.java |  22 ++++
 .../hbase/master/TestMasterNoCluster.java       |   5 +-
 ...stServerCrashProcedureCarryingMetaStuck.java |  95 ++++++++++++++++
 .../master/TestServerCrashProcedureStuck.java   |  66 +----------
 .../MasterProcedureTestingUtility.java          |   5 +-
 22 files changed, 515 insertions(+), 198 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
----------------------------------------------------------------------
diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
index dc9b5d4..55d195b 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
+++ 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
@@ -22,5 +22,5 @@ import org.apache.yetus.audience.InterfaceAudience;
 
 @InterfaceAudience.Private
 public enum LockedResourceType {
-  SERVER, NAMESPACE, TABLE, REGION, PEER
+  SERVER, NAMESPACE, TABLE, REGION, PEER, META
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 7349aa6..fcb9989 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -910,7 +910,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
 
     // Bring up hbase:meta. recoverMeta is a blocking call waiting until 
hbase:meta is deployed.
     // It also starts the TableStateManager.
-    MasterMetaBootstrap metaBootstrap = createMetaBootstrap(this, status);
+    MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
     metaBootstrap.recoverMeta();
 
     //Initialize after meta as it scans meta
@@ -1055,12 +1055,18 @@ public class HMaster extends HRegionServer implements 
MasterServices {
   }
 
   /**
+   * <p>
    * Create a {@link MasterMetaBootstrap} instance.
+   * </p>
+   * <p>
+   * Will be overridden in tests.
+   * </p>
    */
-  MasterMetaBootstrap createMetaBootstrap(final HMaster master, final 
MonitoredTask status) {
+  @VisibleForTesting
+  protected MasterMetaBootstrap createMetaBootstrap() {
     // We put this out here in a method so can do a Mockito.spy and stub it out
     // w/ a mocked up MasterMetaBootstrap.
-    return new MasterMetaBootstrap(master, status);
+    return new MasterMetaBootstrap(this);
   }
 
   /**
@@ -3161,7 +3167,8 @@ public class HMaster extends HRegionServer implements 
MasterServices {
       cpHost.preGetLocks();
     }
 
-    MasterProcedureScheduler procedureScheduler = 
procedureExecutor.getEnvironment().getProcedureScheduler();
+    MasterProcedureScheduler procedureScheduler =
+      procedureExecutor.getEnvironment().getProcedureScheduler();
 
     final List<LockedResource> lockedResources = procedureScheduler.getLocks();
 
@@ -3606,11 +3613,13 @@ public class HMaster extends HRegionServer implements 
MasterServices {
 
   @Override
   public boolean recoverMeta() throws IOException {
-    ProcedurePrepareLatch latch = ProcedurePrepareLatch.createLatch(2, 0);
+    // we need to block here so the latch should be greater than the current 
version to make sure
+    // that we will block.
+    ProcedurePrepareLatch latch = 
ProcedurePrepareLatch.createLatch(Integer.MAX_VALUE, 0);
     procedureExecutor.submitProcedure(new RecoverMetaProcedure(null, true, 
latch));
     latch.await();
-    LOG.info("hbase:meta deployed at=" +
-        getMetaTableLocator().getMetaRegionLocation(getZooKeeper()));
+    LOG.info("hbase:meta deployed at={}",
+      getMetaTableLocator().getMetaRegionLocation(getZooKeeper()));
     return assignmentManager.isMetaInitialized();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index 59f1233..dd46e41 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -21,14 +21,12 @@ package org.apache.hadoop.hbase.master;
 import java.io.IOException;
 import java.util.List;
 import java.util.Set;
-
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
@@ -44,12 +42,10 @@ import org.slf4j.LoggerFactory;
 public class MasterMetaBootstrap {
   private static final Logger LOG = 
LoggerFactory.getLogger(MasterMetaBootstrap.class);
 
-  private final MonitoredTask status;
   private final HMaster master;
 
-  public MasterMetaBootstrap(final HMaster master, final MonitoredTask status) 
{
+  public MasterMetaBootstrap(HMaster master) {
     this.master = master;
-    this.status = status;
   }
 
   public void recoverMeta() throws InterruptedException, IOException {
@@ -58,7 +54,7 @@ public class MasterMetaBootstrap {
     // Now we can start the TableStateManager. It is backed by hbase:meta.
     master.getTableStateManager().start();
     // Enable server crash procedure handling
-    enableCrashedServerProcessing(false);
+    enableCrashedServerProcessing();
   }
 
   public void processDeadServers() {
@@ -142,8 +138,7 @@ public class MasterMetaBootstrap {
     }
   }
 
-  private void enableCrashedServerProcessing(final boolean waitForMeta)
-      throws InterruptedException {
+  private void enableCrashedServerProcessing() throws InterruptedException {
     // If crashed server processing is disabled, we enable it and expire those 
dead but not expired
     // servers. This is required so that if meta is assigning to a server 
which dies after
     // assignMeta starts assignment, ServerCrashProcedure can re-assign it. 
Otherwise, we will be
@@ -152,9 +147,5 @@ public class MasterMetaBootstrap {
       master.setServerCrashProcessingEnabled(true);
       master.getServerManager().processQueuedDeadServers();
     }
-
-    if (waitForMeta) {
-      
master.getMetaTableLocator().waitMetaRegionLocation(master.getZooKeeper());
-    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 1f20e88..3412c82 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -35,7 +35,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.stream.Collectors;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseIOException;
 import org.apache.hadoop.hbase.HConstants;
@@ -79,16 +78,9 @@ import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureInMemoryChore;
 import org.apache.hadoop.hbase.procedure2.util.StringUtils;
 import org.apache.hadoop.hbase.regionserver.SequenceId;
-import org.apache.hadoop.hbase.util.HasThread;
-import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
-import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
-import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
-import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.HasThread;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.util.VersionInfo;
@@ -96,6 +88,15 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
+import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
+import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
+
 /**
  * The AssignmentManager is the coordinator for region assign/unassign 
operations.
  * <ul>
@@ -966,7 +967,7 @@ public class AssignmentManager implements ServerListener {
     final ServerStateNode serverNode = 
regionStates.getOrCreateServer(serverName);
 
     synchronized (serverNode) {
-      if (serverNode.isInState(ServerState.SPLITTING, ServerState.OFFLINE)) {
+      if (!serverNode.isInState(ServerState.ONLINE)) {
         LOG.warn("Got a report from a server result in state " + 
serverNode.getState());
         return;
       }
@@ -1918,22 +1919,38 @@ public class AssignmentManager implements 
ServerListener {
   }
 
   /**
+   * <p>
    * This is a very particular check. The {@link 
org.apache.hadoop.hbase.master.ServerManager} is
-   * where you go to check on state of 'Servers', what Servers are online, 
etc. Here we are
-   * checking the state of a server that is post expiration, a ServerManager 
function that moves a
-   * server from online to dead. Here we are seeing if the server has moved 
beyond a particular
-   * point in the recovery process such that it is safe to move on with 
assigns; etc.
-   * @return True if this Server does not exist or if does and it is marked as 
OFFLINE (which
-   *   happens after all WALs have been split on this server making it so 
assigns, etc. can
-   *   proceed). If null, presumes the ServerStateNode was cleaned up by SCP.
+   * where you go to check on state of 'Servers', what Servers are online, etc.
+   * </p>
+   * <p>
+   * Here we are checking the state of a server that is post expiration, a 
ServerManager function
+   * that moves a server from online to dead. Here we are seeing if the server 
has moved beyond a
+   * particular point in the recovery process such that it is safe to move on 
with assigns; etc.
+   * </p>
+   * <p>
+   * For now it is only used in
+   * {@link UnassignProcedure#remoteCallFailed(MasterProcedureEnv, 
RegionStateNode, IOException)} to
+   * see whether we can safely quit without losing data.
+   * </p>
+   * @param meta whether to check for meta log splitting
+   * @return {@code true} if the server does not exist or the log splitting is 
done, i.e, the server
+   *         is in OFFLINE state, or for meta log, is in SPLITTING_META_DONE 
state. If null,
+   *         presumes the ServerStateNode was cleaned up by SCP.
+   * @see UnassignProcedure#remoteCallFailed(MasterProcedureEnv, 
RegionStateNode, IOException)
    */
-  boolean isDeadServerProcessed(final ServerName serverName) {
+  boolean isLogSplittingDone(ServerName serverName, boolean meta) {
     ServerStateNode ssn = this.regionStates.getServerNode(serverName);
     if (ssn == null) {
       return true;
     }
+    ServerState[] inState =
+      meta
+        ? new ServerState[] { ServerState.SPLITTING_META_DONE, 
ServerState.SPLITTING,
+          ServerState.OFFLINE }
+        : new ServerState[] { ServerState.OFFLINE };
     synchronized (ssn) {
-      return ssn.isOffline();
+      return ssn.isInState(inState);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 5f0578e..26b340f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -318,12 +318,26 @@ public class RegionStates {
     ONLINE,
 
     /**
+     * Only server which carries meta can have this state. We will split wal 
for meta and then
+     * assign meta first before splitting other wals.
+     */
+    SPLITTING_META,
+
+    /**
+     * Indicate that the meta splitting is done. We need this state so that 
the UnassignProcedure
+     * for meta can safely quit. See the comments in 
UnassignProcedure.remoteCallFailed for more
+     * details.
+     */
+    SPLITTING_META_DONE,
+
+    /**
      * Server expired/crashed. Currently undergoing WAL splitting.
      */
     SPLITTING,
 
     /**
-     * WAL splitting done.
+     * WAL splitting done. This state will be used to tell the 
UnassignProcedure that it can safely
+     * quit. See the comments in UnassignProcedure.remoteCallFailed for more 
details.
      */
     OFFLINE
   }
@@ -357,10 +371,6 @@ public class RegionStates {
       return reportEvent;
     }
 
-    public boolean isOffline() {
-      return this.state.equals(ServerState.OFFLINE);
-    }
-
     public boolean isInState(final ServerState... expected) {
       boolean expectedState = false;
       if (expected != null) {
@@ -371,7 +381,7 @@ public class RegionStates {
       return expectedState;
     }
 
-    public void setState(final ServerState state) {
+    private void setState(final ServerState state) {
       this.state = state;
     }
 
@@ -612,18 +622,40 @@ public class RegionStates {
   }
 
   // 
============================================================================================
-  //  TODO: split helpers
+  // Split helpers
+  // These methods will only be called in ServerCrashProcedure, and at the end 
of SCP we will remove
+  // the ServerStateNode by calling removeServer.
   // 
============================================================================================
 
+  private void setServerState(ServerName serverName, ServerState state) {
+    ServerStateNode serverNode = getOrCreateServer(serverName);
+    synchronized (serverNode) {
+      serverNode.setState(state);
+    }
+  }
+
   /**
-   * Call this when we start log splitting a crashed Server.
+   * Call this when we start meta log splitting a crashed Server.
+   * @see #metaLogSplit(ServerName)
+   */
+  public void metaLogSplitting(ServerName serverName) {
+    setServerState(serverName, ServerState.SPLITTING_META);
+  }
+
+  /**
+   * Called after we've split the meta logs on a crashed Server.
+   * @see #metaLogSplitting(ServerName)
+   */
+  public void metaLogSplit(ServerName serverName) {
+    setServerState(serverName, ServerState.SPLITTING_META_DONE);
+  }
+
+  /**
+   * Call this when we start log splitting for a crashed Server.
    * @see #logSplit(ServerName)
    */
   public void logSplitting(final ServerName serverName) {
-    final ServerStateNode serverNode = getOrCreateServer(serverName);
-    synchronized (serverNode) {
-      serverNode.setState(ServerState.SPLITTING);
-    }
+    setServerState(serverName, ServerState.SPLITTING);
   }
 
   /**
@@ -631,17 +663,7 @@ public class RegionStates {
    * @see #logSplitting(ServerName)
    */
   public void logSplit(final ServerName serverName) {
-    final ServerStateNode serverNode = getOrCreateServer(serverName);
-    synchronized (serverNode) {
-      serverNode.setState(ServerState.OFFLINE);
-    }
-  }
-
-  public void logSplit(final RegionInfo regionInfo) {
-    final RegionStateNode regionNode = getRegionStateNode(regionInfo);
-    synchronized (regionNode) {
-      regionNode.setState(State.SPLIT);
-    }
+    setServerState(serverName, ServerState.OFFLINE);
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
index 946bd3b..b96fb20 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
@@ -213,7 +213,7 @@ public abstract class RegionTransitionProcedure
       RegionStateNode regionNode, IOException exception);
 
   @Override
-  public void remoteCallFailed(final MasterProcedureEnv env,
+  public synchronized void remoteCallFailed(final MasterProcedureEnv env,
       final ServerName serverName, final IOException exception) {
     final RegionStateNode regionNode = getRegionState(env);
     LOG.warn("Remote call failed {}; {}; {}; exception={}", serverName,

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
index e2efdec..4f58a0f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
@@ -310,12 +310,29 @@ public class UnassignProcedure extends 
RegionTransitionProcedure {
           exception.getClass().getSimpleName());
       if 
(!env.getMasterServices().getServerManager().expireServer(serverName)) {
         // Failed to queue an expire. Lots of possible reasons including it 
may be already expired.
-        // If so, is it beyond the state where we will be woken-up if go ahead 
and suspend the
-        // procedure. Look for this rare condition.
-        if (env.getAssignmentManager().isDeadServerProcessed(serverName)) {
+        // In ServerCrashProcedure and RecoverMetaProcedure, there is a 
handleRIT stage where we
+        // will iterator over all the RIT procedures for the related regions 
of a crashed RS and
+        // fail them with ServerCrashException. You can see the 
isSafeToProceed method above for
+        // more details.
+        // This can work for most cases, but since we do not hold the region 
lock in handleRIT,
+        // there could be race that we arrive here after the handleRIT stage 
of the SCP. So here we
+        // need to check whether it is safe to quit.
+        // Notice that, the first assumption is that we can only quit after 
the log splitting is
+        // done, as MRP can schedule an AssignProcedure right after us, and if 
the log splitting has
+        // not been done then there will be data loss. And in SCP, we will 
change the state from
+        // SPLITTING to OFFLINE(or SPLITTING_META_DONE for meta log 
processing) after finishing the
+        // log splitting, and then calling handleRIT, so checking the state 
here can be a safe
+        // fence. If the state is not OFFLINE(or SPLITTING_META_DONE), then we 
can just leave this
+        // procedure in suspended state as we can make sure that the handleRIT 
has not been executed
+        // yet and it will wake us up later. And if the state is OFFLINE(or 
SPLITTING_META_DONE), we
+        // can safely quit since there will be no data loss. There could be 
duplicated
+        // AssignProcedures for the same region but it is OK as we will do a 
check at the beginning
+        // of AssignProcedure to prevent double assign. And there we have 
region lock so there will
+        // be no race.
+        if (env.getAssignmentManager().isLogSplittingDone(serverName, 
isMeta())) {
           // Its ok to proceed with this unassign.
-          LOG.info("{} is dead and processed; moving procedure to finished 
state; {}",
-              serverName, this);
+          LOG.info("{} is dead and processed; moving procedure to finished 
state; {}", serverName,
+            this);
           proceed(env, regionNode);
           // Return true; wake up the procedure so we can act on proceed.
           return true;

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
index d78efc6..9a497a7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
@@ -101,14 +101,18 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
     (n, k) -> n.compareKey((TableName) k);
   private final static AvlKeyComparator<PeerQueue> PEER_QUEUE_KEY_COMPARATOR =
     (n, k) -> n.compareKey((String) k);
+  private final static AvlKeyComparator<MetaQueue> META_QUEUE_KEY_COMPARATOR =
+    (n, k) -> n.compareKey((TableName) k);
 
   private final FairQueue<ServerName> serverRunQueue = new FairQueue<>();
   private final FairQueue<TableName> tableRunQueue = new FairQueue<>();
   private final FairQueue<String> peerRunQueue = new FairQueue<>();
+  private final FairQueue<TableName> metaRunQueue = new FairQueue<>();
 
   private final ServerQueue[] serverBuckets = new ServerQueue[128];
   private TableQueue tableMap = null;
   private PeerQueue peerMap = null;
+  private MetaQueue metaMap = null;
 
   private final SchemaLocking locking = new SchemaLocking();
 
@@ -119,7 +123,9 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
 
   @Override
   protected void enqueue(final Procedure proc, final boolean addFront) {
-    if (isTableProcedure(proc)) {
+    if (isMetaProcedure(proc)) {
+      doAdd(metaRunQueue, getMetaQueue(), proc, addFront);
+    } else if (isTableProcedure(proc)) {
       doAdd(tableRunQueue, getTableQueue(getTableName(proc)), proc, addFront);
     } else if (isServerProcedure(proc)) {
       doAdd(serverRunQueue, getServerQueue(getServerName(proc)), proc, 
addFront);
@@ -153,16 +159,20 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
 
   @Override
   protected boolean queueHasRunnables() {
-    return tableRunQueue.hasRunnables() || serverRunQueue.hasRunnables() ||
-        peerRunQueue.hasRunnables();
+    return metaRunQueue.hasRunnables() || tableRunQueue.hasRunnables() ||
+      serverRunQueue.hasRunnables() || peerRunQueue.hasRunnables();
   }
 
   @Override
   protected Procedure dequeue() {
+    // meta procedure is always the first priority
+    Procedure<?> pollResult = doPoll(metaRunQueue);
     // For now, let server handling have precedence over table handling; 
presumption is that it
     // is more important handling crashed servers than it is running the
     // enabling/disabling tables, etc.
-    Procedure<?> pollResult = doPoll(serverRunQueue);
+    if (pollResult == null) {
+      pollResult = doPoll(serverRunQueue);
+    }
     if (pollResult == null) {
       pollResult = doPoll(peerRunQueue);
     }
@@ -263,31 +273,24 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
     }
   }
 
-  @Override
-  protected int queueSize() {
+  private int queueSize(Queue<?> head) {
     int count = 0;
-
-    // Server queues
-    final AvlTreeIterator<ServerQueue> serverIter = new AvlTreeIterator<>();
-    for (int i = 0; i < serverBuckets.length; ++i) {
-      serverIter.seekFirst(serverBuckets[i]);
-      while (serverIter.hasNext()) {
-        count += serverIter.next().size();
-      }
-    }
-
-    // Table queues
-    final AvlTreeIterator<TableQueue> tableIter = new 
AvlTreeIterator<>(tableMap);
-    while (tableIter.hasNext()) {
-      count += tableIter.next().size();
+    AvlTreeIterator<Queue<?>> iter = new AvlTreeIterator<Queue<?>>(head);
+    while (iter.hasNext()) {
+      count += iter.next().size();
     }
+    return count;
+  }
 
-    // Peer queues
-    final AvlTreeIterator<PeerQueue> peerIter = new AvlTreeIterator<>(peerMap);
-    while (peerIter.hasNext()) {
-      count += peerIter.next().size();
+  @Override
+  protected int queueSize() {
+    int count = 0;
+    for (ServerQueue serverMap : serverBuckets) {
+      count += queueSize(serverMap);
     }
-
+    count += queueSize(tableMap);
+    count += queueSize(peerMap);
+    count += queueSize(metaMap);
     return count;
   }
 
@@ -431,6 +434,22 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
   }
 
   // 
============================================================================
+  //  Meta Queue Lookup Helpers
+  // 
============================================================================
+  private MetaQueue getMetaQueue() {
+    MetaQueue node = AvlTree.get(metaMap, TableName.META_TABLE_NAME, 
META_QUEUE_KEY_COMPARATOR);
+    if (node != null) {
+      return node;
+    }
+    node = new MetaQueue(locking.getMetaLock());
+    metaMap = AvlTree.insert(metaMap, node);
+    return node;
+  }
+
+  private static boolean isMetaProcedure(Procedure<?> proc) {
+    return proc instanceof MetaProcedureInterface;
+  }
+  // 
============================================================================
   //  Table Locking Helpers
   // 
============================================================================
   /**
@@ -870,6 +889,49 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
     }
   }
 
+  // 
============================================================================
+  // Meta Locking Helpers
+  // 
============================================================================
+  /**
+   * Try to acquire the exclusive lock on meta.
+   * @see #wakeMetaExclusiveLock(Procedure)
+   * @param procedure the procedure trying to acquire the lock
+   * @return true if the procedure has to wait for meta to be available
+   */
+  public boolean waitMetaExclusiveLock(Procedure<?> procedure) {
+    schedLock();
+    try {
+      final LockAndQueue lock = locking.getMetaLock();
+      if (lock.tryExclusiveLock(procedure)) {
+        removeFromRunQueue(metaRunQueue, getMetaQueue());
+        return false;
+      }
+      waitProcedure(lock, procedure);
+      logLockedResource(LockedResourceType.META, 
TableName.META_TABLE_NAME.getNameAsString());
+      return true;
+    } finally {
+      schedUnlock();
+    }
+  }
+
+  /**
+   * Wake the procedures waiting for meta.
+   * @see #waitMetaExclusiveLock(Procedure)
+   * @param procedure the procedure releasing the lock
+   */
+  public void wakeMetaExclusiveLock(Procedure<?> procedure) {
+    schedLock();
+    try {
+      final LockAndQueue lock = locking.getMetaLock();
+      lock.releaseExclusiveLock(procedure);
+      addToRunQueue(metaRunQueue, getMetaQueue());
+      int waitingCount = wakeWaitingProcedures(lock);
+      wakePollIfNeeded(waitingCount);
+    } finally {
+      schedUnlock();
+    }
+  }
+
   /**
    * For debugging. Expensive.
    */

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaProcedureInterface.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaProcedureInterface.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaProcedureInterface.java
new file mode 100644
index 0000000..39c271b
--- /dev/null
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaProcedureInterface.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+public interface MetaProcedureInterface {
+
+  enum MetaOperationType {
+    RECOVER
+  }
+
+  default MetaOperationType getMetaOperationType() {
+    return MetaOperationType.RECOVER;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaQueue.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaQueue.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaQueue.java
new file mode 100644
index 0000000..190c956
--- /dev/null
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaQueue.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.procedure2.LockStatus;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+class MetaQueue extends Queue<TableName> {
+
+  protected MetaQueue(LockStatus lockStatus) {
+    super(TableName.META_TABLE_NAME, 1, lockStatus);
+  }
+
+  @Override
+  boolean requireExclusiveLock(Procedure<?> proc) {
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerProcedureInterface.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerProcedureInterface.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerProcedureInterface.java
index 4abc9ad..399bcd7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerProcedureInterface.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerProcedureInterface.java
@@ -18,10 +18,8 @@
 package org.apache.hadoop.hbase.master.procedure;
 
 import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.yetus.audience.InterfaceStability;
 
 @InterfaceAudience.Private
-@InterfaceStability.Evolving
 public interface PeerProcedureInterface {
 
   enum PeerOperationType {

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
index 7572495..97035f1 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -15,13 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.hadoop.hbase.master.procedure;
 
 import java.io.IOException;
 import java.util.Set;
 import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
@@ -54,7 +52,7 @@ import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R
 @InterfaceAudience.Private
 public class RecoverMetaProcedure
     extends StateMachineProcedure<MasterProcedureEnv, 
MasterProcedureProtos.RecoverMetaState>
-    implements TableProcedureInterface {
+    implements MetaProcedureInterface {
   private static final Logger LOG = 
LoggerFactory.getLogger(RecoverMetaProcedure.class);
 
   private ServerName failedMetaServer;
@@ -125,21 +123,25 @@ public class RecoverMetaProcedure
           LOG.info("Start " + this);
           if (shouldSplitWal) {
             // TODO: Matteo. We BLOCK here but most important thing to be 
doing at this moment.
+            AssignmentManager am = 
env.getMasterServices().getAssignmentManager();
             if (failedMetaServer != null) {
+              am.getRegionStates().metaLogSplitting(failedMetaServer);
               master.getMasterWalManager().splitMetaLog(failedMetaServer);
+              am.getRegionStates().metaLogSplit(failedMetaServer);
             } else {
               ServerName serverName =
                   
master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper());
               Set<ServerName> previouslyFailedServers =
                   
master.getMasterWalManager().getFailedServersFromLogFolders();
               if (serverName != null && 
previouslyFailedServers.contains(serverName)) {
+                am.getRegionStates().metaLogSplitting(serverName);
                 master.getMasterWalManager().splitMetaLog(serverName);
+                am.getRegionStates().metaLogSplit(serverName);
               }
             }
           }
           setNextState(RecoverMetaState.RECOVER_META_ASSIGN_REGIONS);
           break;
-
         case RECOVER_META_ASSIGN_REGIONS:
           RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
               RegionInfoBuilder.FIRST_META_REGIONINFO, this.replicaId);
@@ -258,7 +260,7 @@ public class RecoverMetaProcedure
 
   @Override
   protected LockState acquireLock(MasterProcedureEnv env) {
-    if (env.getProcedureScheduler().waitTableExclusiveLock(this, 
TableName.META_TABLE_NAME)) {
+    if (env.getProcedureScheduler().waitMetaExclusiveLock(this)) {
       return LockState.LOCK_EVENT_WAIT;
     }
     return LockState.LOCK_ACQUIRED;
@@ -266,7 +268,7 @@ public class RecoverMetaProcedure
 
   @Override
   protected void releaseLock(MasterProcedureEnv env) {
-    env.getProcedureScheduler().wakeTableExclusiveLock(this, 
TableName.META_TABLE_NAME);
+    env.getProcedureScheduler().wakeMetaExclusiveLock(this);
   }
 
   @Override
@@ -274,16 +276,6 @@ public class RecoverMetaProcedure
     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
   }
 
-  @Override
-  public TableName getTableName() {
-    return TableName.META_TABLE_NAME;
-  }
-
-  @Override
-  public TableOperationType getTableOperationType() {
-    return TableOperationType.ENABLE;
-  }
-
   /**
    * @return true if failedMetaServer is not null (meta carrying server 
crashed) or meta is
    * already initialized

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SchemaLocking.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SchemaLocking.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SchemaLocking.java
index b859264..2e5be14 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SchemaLocking.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SchemaLocking.java
@@ -32,6 +32,8 @@ import org.apache.hadoop.hbase.procedure2.LockedResourceType;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.yetus.audience.InterfaceAudience;
 
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
+
 /**
  * <p>
  * Locks on namespaces, tables, and regions.
@@ -49,6 +51,7 @@ class SchemaLocking {
   // Single map for all regions irrespective of tables. Key is encoded region 
name.
   private final Map<String, LockAndQueue> regionLocks = new HashMap<>();
   private final Map<String, LockAndQueue> peerLocks = new HashMap<>();
+  private final LockAndQueue metaLock = new LockAndQueue();
 
   private <T> LockAndQueue getLock(Map<T, LockAndQueue> map, T key) {
     LockAndQueue lock = map.get(key);
@@ -75,6 +78,10 @@ class SchemaLocking {
     return getLock(regionLocks, encodedRegionName);
   }
 
+  LockAndQueue getMetaLock() {
+    return metaLock;
+  }
+
   LockAndQueue removeRegionLock(String encodedRegionName) {
     return regionLocks.remove(encodedRegionName);
   }
@@ -143,8 +150,9 @@ class SchemaLocking {
       LockedResourceType.TABLE);
     addToLockedResources(lockedResources, regionLocks, Function.identity(),
       LockedResourceType.REGION);
-    addToLockedResources(lockedResources, peerLocks, Function.identity(),
-      LockedResourceType.PEER);
+    addToLockedResources(lockedResources, peerLocks, Function.identity(), 
LockedResourceType.PEER);
+    addToLockedResources(lockedResources, 
ImmutableMap.of(TableName.META_TABLE_NAME, metaLock),
+      tn -> tn.getNameAsString(), LockedResourceType.META);
     return lockedResources;
   }
 
@@ -170,6 +178,8 @@ class SchemaLocking {
       case PEER:
         queue = peerLocks.get(resourceName);
         break;
+      case META:
+        queue = metaLock;
       default:
         queue = null;
     }
@@ -193,7 +203,8 @@ class SchemaLocking {
     return "serverLocks=" + filterUnlocked(this.serverLocks) + ", 
namespaceLocks=" +
       filterUnlocked(this.namespaceLocks) + ", tableLocks=" + 
filterUnlocked(this.tableLocks) +
       ", regionLocks=" + filterUnlocked(this.regionLocks) + ", peerLocks=" +
-      filterUnlocked(this.peerLocks);
+      filterUnlocked(this.peerLocks) + ", metaLocks=" +
+      filterUnlocked(ImmutableMap.of(TableName.META_TABLE_NAME, metaLock));
   }
 
   private String filterUnlocked(Map<?, LockAndQueue> locks) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index ae709cd..5a4c10f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -55,8 +55,8 @@ import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.S
  */
 @InterfaceAudience.Private
 public class ServerCrashProcedure
-extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
-implements ServerProcedureInterface {
+    extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
+    implements ServerProcedureInterface {
   private static final Logger LOG = 
LoggerFactory.getLogger(ServerCrashProcedure.class);
 
   /**
@@ -163,11 +163,11 @@ implements ServerProcedureInterface {
                 "; cycles=" + getCycles());
             }
             // Handle RIT against crashed server. Will cancel any ongoing 
assigns/unassigns.
-            // Returns list of regions we need to reassign. NOTE: there is 
nothing to stop a
-            // dispatch happening AFTER this point. Check for the condition if 
a dispatch RPC fails
-            // inside in AssignProcedure/UnassignProcedure. AssignProcedure 
just keeps retrying.
-            // UnassignProcedure is more complicated. See where it does the 
check by calling
-            // am#isDeadServerProcessed.
+            // Returns list of regions we need to reassign.
+            // NOTE: there is nothing to stop a dispatch happening AFTER this 
point. Check for the
+            // condition if a dispatch RPC fails inside in 
AssignProcedure/UnassignProcedure.
+            // AssignProcedure just keeps retrying. UnassignProcedure is more 
complicated. See where
+            // it does the check by calling am#isLogSplittingDone.
             List<RegionInfo> toAssign = handleRIT(env, regionsOnCrashedServer);
             AssignmentManager am = env.getAssignmentManager();
             // CreateAssignProcedure will try to use the old location for the 
region deploy.

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java
index ad6af21..8dd41a3 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java
@@ -19,14 +19,12 @@ package org.apache.hadoop.hbase.master.procedure;
 
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.yetus.audience.InterfaceStability;
 
 /**
  * Procedures that handle servers -- e.g. server crash -- must implement this 
Interface.
  * It is used by the procedure runner to figure locking and what queuing.
  */
 @InterfaceAudience.Private
-@InterfaceStability.Evolving
 public interface ServerProcedureInterface {
   public enum ServerOperationType {
     CRASH_HANDLER

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java
index 98346bb..20de914 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java
@@ -15,19 +15,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.hadoop.hbase.master.procedure;
 
 import org.apache.hadoop.hbase.TableName;
 import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.yetus.audience.InterfaceStability;
 
 /**
  * Procedures that operates on a specific Table (e.g. create, delete, 
snapshot, ...)
  * must implement this interface to allow the system handle the 
lock/concurrency problems.
  */
 @InterfaceAudience.Private
-@InterfaceStability.Evolving
 public interface TableProcedureInterface {
   public enum TableOperationType {
     CREATE, DELETE, DISABLE, EDIT, ENABLE, READ,

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedure.java
new file mode 100644
index 0000000..8d0df5f
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedure.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+import java.util.concurrent.CountDownLatch;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import 
org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
+
+public class DummyRegionProcedure
+    extends AbstractStateMachineRegionProcedure<DummyRegionProcedureState> {
+
+  private final CountDownLatch arrive = new CountDownLatch(1);
+
+  private final CountDownLatch resume = new CountDownLatch(1);
+
+  public DummyRegionProcedure() {
+  }
+
+  public DummyRegionProcedure(MasterProcedureEnv env, RegionInfo hri) {
+    super(env, hri);
+  }
+
+  @Override
+  public TableOperationType getTableOperationType() {
+    return TableOperationType.REGION_EDIT;
+  }
+
+  @Override
+  protected Flow executeFromState(MasterProcedureEnv env, 
DummyRegionProcedureState state)
+      throws ProcedureSuspendedException, ProcedureYieldException, 
InterruptedException {
+    arrive.countDown();
+    resume.await();
+    return Flow.NO_MORE_STATE;
+  }
+
+  @Override
+  protected void rollbackState(MasterProcedureEnv env, 
DummyRegionProcedureState state)
+      throws IOException, InterruptedException {
+  }
+
+  @Override
+  protected DummyRegionProcedureState getState(int stateId) {
+    return DummyRegionProcedureState.STATE;
+  }
+
+  @Override
+  protected int getStateId(DummyRegionProcedureState state) {
+    return 0;
+  }
+
+  @Override
+  protected DummyRegionProcedureState getInitialState() {
+    return DummyRegionProcedureState.STATE;
+  }
+
+  public void waitUntilArrive() throws InterruptedException {
+    arrive.await();
+  }
+
+  public void resume() {
+    resume.countDown();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedureState.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedureState.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedureState.java
new file mode 100644
index 0000000..bcce7e6
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/DummyRegionProcedureState.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+public enum DummyRegionProcedureState {
+  STATE
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
index 43ddd83..a52a60c 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
@@ -40,7 +40,6 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
 import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.replication.ReplicationException;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
@@ -261,8 +260,8 @@ public class TestMasterNoCluster {
 
     HMaster master = new HMaster(conf) {
       @Override
-      MasterMetaBootstrap createMetaBootstrap(final HMaster master, final 
MonitoredTask status) {
-        return new MasterMetaBootstrap(this, status) {
+      protected MasterMetaBootstrap createMetaBootstrap() {
+        return new MasterMetaBootstrap(this) {
           @Override
           protected void assignMetaReplicas()
               throws IOException, InterruptedException, KeeperException {

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
new file mode 100644
index 0000000..748cd0e
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.AsyncAdmin;
+import org.apache.hadoop.hbase.client.AsyncConnection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ MasterTests.class, LargeTests.class })
+public class TestServerCrashProcedureCarryingMetaStuck {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    
HBaseClassTestRule.forClass(TestServerCrashProcedureCarryingMetaStuck.class);
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    UTIL.startMiniCluster(3);
+    UTIL.getAdmin().balancerSwitch(false, true);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void test() throws Exception {
+    RegionServerThread rsThread = null;
+    for (RegionServerThread t : 
UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
+      if 
(!t.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty()) {
+        rsThread = t;
+        break;
+      }
+    }
+    HRegionServer rs = rsThread.getRegionServer();
+    RegionInfo hri = 
rs.getRegions(TableName.META_TABLE_NAME).get(0).getRegionInfo();
+    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
+    ProcedureExecutor<MasterProcedureEnv> executor = 
master.getMasterProcedureExecutor();
+    DummyRegionProcedure proc = new 
DummyRegionProcedure(executor.getEnvironment(), hri);
+    long procId = master.getMasterProcedureExecutor().submitProcedure(proc);
+    proc.waitUntilArrive();
+    try (AsyncConnection conn =
+      ConnectionFactory.createAsyncConnection(UTIL.getConfiguration()).get()) {
+      AsyncAdmin admin = conn.getAdmin();
+      CompletableFuture<Void> future = admin.move(hri.getRegionName());
+      rs.abort("For testing!");
+
+      UTIL.waitFor(30000,
+        () -> executor.getProcedures().stream().filter(p -> p instanceof 
AssignProcedure)
+          .map(p -> (AssignProcedure) p)
+          .anyMatch(p -> Bytes.equals(hri.getRegionName(), 
p.getRegionInfo().getRegionName())));
+      proc.resume();
+      UTIL.waitFor(30000, () -> executor.isFinished(procId));
+      // see whether the move region procedure can finish properly
+      future.get(30, TimeUnit.SECONDS);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
index a83e0d2..2681657 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
@@ -17,12 +17,8 @@
  */
 package org.apache.hadoop.hbase.master;
 
-import java.io.IOException;
 import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.TableName;
@@ -31,11 +27,8 @@ import org.apache.hadoop.hbase.client.AsyncConnection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
-import 
org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
-import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
-import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
@@ -63,58 +56,6 @@ public class TestServerCrashProcedureStuck {
 
   private static byte[] CF = Bytes.toBytes("cf");
 
-  private static CountDownLatch ARRIVE = new CountDownLatch(1);
-
-  private static CountDownLatch RESUME = new CountDownLatch(1);
-
-  public enum DummyState {
-    STATE
-  }
-
-  public static final class DummyRegionProcedure
-      extends AbstractStateMachineRegionProcedure<DummyState> {
-
-    public DummyRegionProcedure() {
-    }
-
-    public DummyRegionProcedure(MasterProcedureEnv env, RegionInfo hri) {
-      super(env, hri);
-    }
-
-    @Override
-    public TableOperationType getTableOperationType() {
-      return TableOperationType.REGION_EDIT;
-    }
-
-    @Override
-    protected Flow executeFromState(MasterProcedureEnv env, DummyState state)
-        throws ProcedureSuspendedException, ProcedureYieldException, 
InterruptedException {
-      ARRIVE.countDown();
-      RESUME.await();
-      return Flow.NO_MORE_STATE;
-    }
-
-    @Override
-    protected void rollbackState(MasterProcedureEnv env, DummyState state)
-        throws IOException, InterruptedException {
-    }
-
-    @Override
-    protected DummyState getState(int stateId) {
-      return DummyState.STATE;
-    }
-
-    @Override
-    protected int getStateId(DummyState state) {
-      return 0;
-    }
-
-    @Override
-    protected DummyState getInitialState() {
-      return DummyState.STATE;
-    }
-  }
-
   @BeforeClass
   public static void setUp() throws Exception {
     UTIL.startMiniCluster(3);
@@ -129,8 +70,7 @@ public class TestServerCrashProcedureStuck {
   }
 
   @Test
-  public void test()
-      throws IOException, InterruptedException, ExecutionException, 
TimeoutException {
+  public void test() throws Exception {
     RegionServerThread rsThread = null;
     for (RegionServerThread t : 
UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
       if (!t.getRegionServer().getRegions(TABLE_NAME).isEmpty()) {
@@ -144,7 +84,7 @@ public class TestServerCrashProcedureStuck {
     ProcedureExecutor<MasterProcedureEnv> executor = 
master.getMasterProcedureExecutor();
     DummyRegionProcedure proc = new 
DummyRegionProcedure(executor.getEnvironment(), hri);
     long procId = master.getMasterProcedureExecutor().submitProcedure(proc);
-    ARRIVE.await();
+    proc.waitUntilArrive();
     try (AsyncConnection conn =
       ConnectionFactory.createAsyncConnection(UTIL.getConfiguration()).get()) {
       AsyncAdmin admin = conn.getAdmin();
@@ -155,7 +95,7 @@ public class TestServerCrashProcedureStuck {
         () -> executor.getProcedures().stream().filter(p -> p instanceof 
AssignProcedure)
           .map(p -> (AssignProcedure) p)
           .anyMatch(p -> Bytes.equals(hri.getRegionName(), 
p.getRegionInfo().getRegionName())));
-      RESUME.countDown();
+      proc.resume();
       UTIL.waitFor(30000, () -> executor.isFinished(procId));
       // see whether the move region procedure can finish properly
       future.get(30, TimeUnit.SECONDS);

http://git-wip-us.apache.org/repos/asf/hbase/blob/6befdc43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
index 785e85f..3f61de4 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
@@ -27,7 +27,6 @@ import java.util.List;
 import java.util.TreeSet;
 import java.util.concurrent.Callable;
 import java.util.concurrent.atomic.AtomicInteger;
-
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -53,7 +52,6 @@ import org.apache.hadoop.hbase.master.MasterMetaBootstrap;
 import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.TableStateManager;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.monitoring.TaskMonitor;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
@@ -98,8 +96,7 @@ public class MasterProcedureTestingUtility {
         public Void call() throws Exception {
           final AssignmentManager am = env.getAssignmentManager();
           am.start();
-          MasterMetaBootstrap metaBootstrap = new MasterMetaBootstrap(master,
-              TaskMonitor.get().createStatus("meta"));
+          MasterMetaBootstrap metaBootstrap = new MasterMetaBootstrap(master);
           metaBootstrap.recoverMeta();
           metaBootstrap.processDeadServers();
           am.joinCluster();

Reply via email to