Repository: hbase
Updated Branches:
  refs/heads/master 4cb76aa92 -> 3a82cf238


HBASE-11760 Tighten up region state transition


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3a82cf23
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3a82cf23
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3a82cf23

Branch: refs/heads/master
Commit: 3a82cf238ba287a6dc555e31c01c2dd43e880a1f
Parents: 4cb76aa
Author: Jimmy Xiang <[email protected]>
Authored: Thu Aug 14 16:39:12 2014 -0700
Committer: Jimmy Xiang <[email protected]>
Committed: Thu Sep 11 13:21:12 2014 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hbase/master/RegionState.java |  56 +-
 .../RegionAlreadyInTransitionException.java     |  39 -
 .../hadoop/hbase/master/AssignCallable.java     |   6 +-
 .../hadoop/hbase/master/AssignmentManager.java  | 958 +++++++++++--------
 .../hadoop/hbase/master/RegionStates.java       |   9 +-
 .../hadoop/hbase/master/ServerManager.java      |   6 +-
 .../master/handler/ServerShutdownHandler.java   |   2 +-
 .../hbase/regionserver/HRegionServer.java       |  14 +-
 .../hbase/regionserver/RSRpcServices.java       |  55 +-
 .../handler/CloseRegionHandler.java             |   2 +-
 .../master/TestAssignmentManagerOnCluster.java  |  26 +-
 .../regionserver/TestRegionServerNoMaster.java  |  27 +-
 12 files changed, 630 insertions(+), 570 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
----------------------------------------------------------------------
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
index 0a9c123..e7af336 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
@@ -36,10 +36,10 @@ public class RegionState {
   @InterfaceStability.Evolving
   public enum State {
     OFFLINE,        // region is in an offline state
-    PENDING_OPEN,   // sent rpc to server to open but has not begun
+    PENDING_OPEN,   // same as OPENING, to be removed
     OPENING,        // server has begun to open but not yet done
     OPEN,           // server opened region and updated meta
-    PENDING_CLOSE,  // sent rpc to server to close but has not begun
+    PENDING_CLOSE,  // same as CLOSING, to be removed
     CLOSING,        // server has begun to close but not yet done
     CLOSED,         // server closed region and updated meta
     SPLITTING,      // server started split of a region
@@ -210,30 +210,28 @@ public class RegionState {
     return serverName;
   }
 
+  /**
+   * PENDING_CLOSE (to be removed) is the same as CLOSING
+   */
   public boolean isClosing() {
-    return state == State.CLOSING;
+    return state == State.PENDING_CLOSE || state == State.CLOSING;
   }
 
   public boolean isClosed() {
     return state == State.CLOSED;
   }
 
-  public boolean isPendingClose() {
-    return state == State.PENDING_CLOSE;
-  }
-
+  /**
+   * PENDING_OPEN (to be removed) is the same as OPENING
+   */
   public boolean isOpening() {
-    return state == State.OPENING;
+    return state == State.PENDING_OPEN || state == State.OPENING;
   }
 
   public boolean isOpened() {
     return state == State.OPEN;
   }
 
-  public boolean isPendingOpen() {
-    return state == State.PENDING_OPEN;
-  }
-
   public boolean isOffline() {
     return state == State.OFFLINE;
   }
@@ -270,40 +268,6 @@ public class RegionState {
     return state == State.MERGING_NEW;
   }
 
-  public boolean isOpenOrMergingOnServer(final ServerName sn) {
-    return isOnServer(sn) && (isOpened() || isMerging());
-  }
-
-  public boolean isOpenOrMergingNewOnServer(final ServerName sn) {
-    return isOnServer(sn) && (isOpened() || isMergingNew());
-  }
-
-  public boolean isOpenOrSplittingOnServer(final ServerName sn) {
-    return isOnServer(sn) && (isOpened() || isSplitting());
-  }
-
-  public boolean isOpenOrSplittingNewOnServer(final ServerName sn) {
-    return isOnServer(sn) && (isOpened() || isSplittingNew());
-  }
-
-  public boolean isPendingOpenOrOpeningOnServer(final ServerName sn) {
-    return isOnServer(sn) && isPendingOpenOrOpening();
-  }
-
-  // Failed open is also kind of pending open
-  public boolean isPendingOpenOrOpening() {
-    return isPendingOpen() || isOpening() || isFailedOpen();
-  }
-
-  public boolean isPendingCloseOrClosingOnServer(final ServerName sn) {
-    return isOnServer(sn) && isPendingCloseOrClosing();
-  }
-
-  // Failed close is also kind of pending close
-  public boolean isPendingCloseOrClosing() {
-    return isPendingClose() || isClosing() || isFailedClose();
-  }
-
   public boolean isOnServer(final ServerName sn) {
     return serverName != null && serverName.equals(sn);
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionAlreadyInTransitionException.java
----------------------------------------------------------------------
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionAlreadyInTransitionException.java
 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionAlreadyInTransitionException.java
deleted file mode 100644
index 6cad068..0000000
--- 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionAlreadyInTransitionException.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.regionserver;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-
-import java.io.IOException;
-
-/**
- * This exception is thrown when a region server is asked to open or close
- * a region but it's already processing it
- */
-@SuppressWarnings("serial")
[email protected]
[email protected]
-public class RegionAlreadyInTransitionException extends IOException {
-
-  public RegionAlreadyInTransitionException(String s) {
-    super(s);
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignCallable.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignCallable.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignCallable.java
index 88e5c2a..e2e729d 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignCallable.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignCallable.java
@@ -34,18 +34,16 @@ public class AssignCallable implements Callable<Object> {
   private AssignmentManager assignmentManager;
 
   private HRegionInfo hri;
-  private boolean newPlan;
 
   public AssignCallable(
-      AssignmentManager assignmentManager, HRegionInfo hri, boolean newPlan) {
+      AssignmentManager assignmentManager, HRegionInfo hri) {
     this.assignmentManager = assignmentManager;
-    this.newPlan = newPlan;
     this.hri = hri;
   }
 
   @Override
   public Object call() throws Exception {
-    assignmentManager.assign(hri, newPlan);
+    assignmentManager.assign(hri);
     return null;
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 5ecbe98..5b9ff36 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -77,7 +77,7 @@ import 
org.apache.hadoop.hbase.master.handler.EnableTableHandler;
 import 
org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
 import 
org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
-import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
+import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
@@ -101,10 +101,6 @@ import com.google.common.annotations.VisibleForTesting;
 public class AssignmentManager {
   private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
 
-  static final String ALREADY_IN_TRANSITION_WAITTIME
-    = "hbase.assignment.already.intransition.waittime";
-  static final int DEFAULT_ALREADY_IN_TRANSITION_WAITTIME = 60000; // 1 minute
-
   protected final Server server;
 
   private ServerManager serverManager;
@@ -136,12 +132,6 @@ public class AssignmentManager {
   private final int maximumAttempts;
 
   /**
-   * Map of two merging regions from the region to be created.
-   */
-  private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
-    = new HashMap<String, PairOfSameType<HRegionInfo>>();
-
-  /**
    * The sleep time for which the assignment will wait before retrying in case 
of hbase:meta assignment
    * failure due to lack of availability of region plan
    */
@@ -751,22 +741,21 @@ public class AssignmentManager {
         try {
           // Send OPEN RPC. If it fails on a IOE or RemoteException,
           // regions will be assigned individually.
+          Configuration conf = server.getConfiguration();
           long maxWaitTime = System.currentTimeMillis() +
-            this.server.getConfiguration().
-              getLong("hbase.regionserver.rpc.startup.waittime", 60000);
+            conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
           for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
             try {
               List<RegionOpeningState> regionOpeningStateList = serverManager
                 .sendRegionOpen(destination, regionOpenInfos);
-              if (regionOpeningStateList == null) {
-                // Failed getting RPC connection to this server
-                return false;
-              }
               for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
                 RegionOpeningState openingState = 
regionOpeningStateList.get(k);
                 if (openingState != RegionOpeningState.OPENED) {
                   HRegionInfo region = regionOpenInfos.get(k).getFirst();
+                  LOG.info("Got opening state " + openingState
+                    + ", will reassign later: " + region);
                   // Failed opening this region, reassign it later
+                  forceRegionStateToOffline(region, true);
                   failedToOpenRegions.add(region);
                 }
               }
@@ -782,8 +771,10 @@ public class AssignmentManager {
               } else if (e instanceof ServerNotRunningYetException) {
                 long now = System.currentTimeMillis();
                 if (now < maxWaitTime) {
-                  LOG.debug("Server is not yet up; waiting up to " +
-                    (maxWaitTime - now) + "ms", e);
+                  if (LOG.isDebugEnabled()) {
+                    LOG.debug("Server is not yet up; waiting up to " +
+                      (maxWaitTime - now) + "ms", e);
+                  }
                   Thread.sleep(100);
                   i--; // reset the try count
                   continue;
@@ -803,6 +794,17 @@ public class AssignmentManager {
                 Thread.sleep(100);
                 i--;
                 continue;
+              } else if (e instanceof FailedServerException && i < 
maximumAttempts) {
+                // In case the server is in the failed server list, no point to
+                // retry too soon. Retry after the failed_server_expiry time
+                long sleepTime = 1 + 
conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
+                  RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+                if (LOG.isDebugEnabled()) {
+                  LOG.debug(destination + " is on failed server list; waiting "
+                    + sleepTime + "ms", e);
+                }
+                Thread.sleep(sleepTime);
+                continue;
               }
               throw e;
             }
@@ -811,6 +813,10 @@ public class AssignmentManager {
           // Can be a socket timeout, EOF, NoRouteToHost, etc
           LOG.info("Unable to communicate with " + destination
             + " in order to assign regions, ", e);
+          for (RegionState state: states) {
+            HRegionInfo region = state.getRegion();
+            forceRegionStateToOffline(region, true);
+          }
           return false;
         }
       } finally {
@@ -844,9 +850,7 @@ public class AssignmentManager {
    * on an unexpected server scenario, for an example)
    */
   private void unassign(final HRegionInfo region,
-      final RegionState state, final ServerName dest) {
-    ServerName server = state.getServerName();
-    long maxWaitTime = -1;
+      final ServerName server, final ServerName dest) {
     for (int i = 1; i <= this.maximumAttempts; i++) {
       if (this.server.isStopped() || this.server.isAborted()) {
         LOG.debug("Server stopped/aborted; skipping unassign of " + region);
@@ -873,7 +877,6 @@ public class AssignmentManager {
         if (t instanceof RemoteException) {
           t = ((RemoteException)t).unwrapRemoteException();
         }
-        boolean logRetries = true;
         if (t instanceof NotServingRegionException
             || t instanceof RegionServerStoppedException
             || t instanceof ServerNotRunningYetException) {
@@ -881,56 +884,34 @@ public class AssignmentManager {
             + ", it's not any more on " + server, t);
           regionStates.updateRegionState(region, State.OFFLINE);
           return;
-        } else if (t instanceof FailedServerException
-            || t instanceof RegionAlreadyInTransitionException) {
-          long sleepTime = 0;
-          Configuration conf = this.server.getConfiguration();
-          if(t instanceof FailedServerException) {
-            sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
-                  RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
-          } else {
-            if (maxWaitTime < 0) {
-              maxWaitTime =
-                  EnvironmentEdgeManager.currentTime()
-                      + conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
-                        DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
-            }
-            long now = EnvironmentEdgeManager.currentTime();
-            if (now < maxWaitTime) {
-              LOG.debug("Region is already in transition; "
-                + "waiting up to " + (maxWaitTime - now) + "ms", t);
-              sleepTime = 100;
-              i--; // reset the try count
-              logRetries = false;
-            }
-          }
+        } else if (t instanceof FailedServerException && i < maximumAttempts) {
+          // In case the server is in the failed server list, no point to
+          // retry too soon. Retry after the failed_server_expiry time
           try {
-            if (sleepTime > 0) {
-              Thread.sleep(sleepTime);
+            Configuration conf = this.server.getConfiguration();
+            long sleepTime = 1 + 
conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
+              RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(server + " is on failed server list; waiting "
+                + sleepTime + "ms", t);
             }
+            Thread.sleep(sleepTime);
           } catch (InterruptedException ie) {
             LOG.warn("Failed to unassign "
               + region.getRegionNameAsString() + " since interrupted", ie);
+            regionStates.updateRegionState(region, State.FAILED_CLOSE);
             Thread.currentThread().interrupt();
-            if (state != null) {
-              regionStates.updateRegionState(region, State.FAILED_CLOSE);
-            }
             return;
           }
         }
 
-        if (logRetries) {
-          LOG.info("Server " + server + " returned " + t + " for "
-            + region.getRegionNameAsString() + ", try=" + i
-            + " of " + this.maximumAttempts, t);
-          // Presume retry or server will expire.
-        }
+        LOG.info("Server " + server + " returned " + t + " for "
+          + region.getRegionNameAsString() + ", try=" + i
+          + " of " + this.maximumAttempts, t);
       }
     }
     // Run out of attempts
-    if (state != null) {
-      regionStates.updateRegionState(region, State.FAILED_CLOSE);
-    }
+    regionStates.updateRegionState(region, State.FAILED_CLOSE);
   }
 
   /**
@@ -961,13 +942,13 @@ public class AssignmentManager {
       }
     case FAILED_CLOSE:
     case FAILED_OPEN:
-      unassign(region, state, null);
+      regionStates.updateRegionState(region, State.PENDING_CLOSE);
+      unassign(region, state.getServerName(), null);
       state = regionStates.getRegionState(region);
-      if (state.isFailedClose()) {
-        // If we can't close the region, we can't re-assign
-        // it so as to avoid possible double assignment/data loss.
-        LOG.info("Skip assigning " +
-          region + ", we couldn't close it: " + state);
+      if (!state.isOffline() && !state.isClosed()) {
+        // If the region isn't offline, we can't re-assign
+        // it now. It will be assigned automatically after
+        // the regionserver reports it's closed.
         return null;
       }
     case OFFLINE:
@@ -993,7 +974,6 @@ public class AssignmentManager {
       RegionPlan plan = null;
       long maxWaitTime = -1;
       HRegionInfo region = state.getRegion();
-      RegionOpeningState regionOpenState;
       Throwable previousException = null;
       for (int i = 1; i <= maximumAttempts; i++) {
         if (server.isStopped() || server.isAborted()) {
@@ -1023,25 +1003,25 @@ public class AssignmentManager {
           regionStates.updateRegionState(region, State.FAILED_OPEN);
           return;
         }
-            // In case of assignment from EnableTableHandler table state is 
ENABLING. Any how
-            // EnableTableHandler will set ENABLED after assigning all the 
table regions. If we
-            // try to set to ENABLED directly then client API may think table 
is enabled.
-            // When we have a case such as all the regions are added directly 
into hbase:meta and we call
-            // assignRegion then we need to make the table ENABLED. Hence in 
such case the table
-            // will not be in ENABLING or ENABLED state.
-            TableName tableName = region.getTable();
-            if (!tableStateManager.isTableState(tableName,
-              ZooKeeperProtos.Table.State.ENABLED, 
ZooKeeperProtos.Table.State.ENABLING)) {
-              LOG.debug("Setting table " + tableName + " to ENABLED state.");
-              setEnabledTable(tableName);
-            }
+        // In case of assignment from EnableTableHandler table state is 
ENABLING. Any how
+        // EnableTableHandler will set ENABLED after assigning all the table 
regions. If we
+        // try to set to ENABLED directly then client API may think table is 
enabled.
+        // When we have a case such as all the regions are added directly into 
hbase:meta and we call
+        // assignRegion then we need to make the table ENABLED. Hence in such 
case the table
+        // will not be in ENABLING or ENABLED state.
+        TableName tableName = region.getTable();
+        if (!tableStateManager.isTableState(tableName,
+          ZooKeeperProtos.Table.State.ENABLED, 
ZooKeeperProtos.Table.State.ENABLING)) {
+          LOG.debug("Setting table " + tableName + " to ENABLED state.");
+          setEnabledTable(tableName);
+        }
         LOG.info("Assigning " + region.getRegionNameAsString() +
             " to " + plan.getDestination().toString());
         // Transition RegionState to PENDING_OPEN
        regionStates.updateRegionState(region,
           State.PENDING_OPEN, plan.getDestination());
 
-        boolean needNewPlan;
+        boolean needNewPlan = false;
         final String assignMsg = "Failed assignment of " + 
region.getRegionNameAsString() +
             " to " + plan.getDestination();
         try {
@@ -1049,20 +1029,8 @@ public class AssignmentManager {
           if (this.shouldAssignRegionsWithFavoredNodes) {
             favoredNodes = 
((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
           }
-          regionOpenState = serverManager.sendRegionOpen(
-              plan.getDestination(), region, favoredNodes);
-
-          if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
-            // Failed opening this region, looping again on a new server.
-            needNewPlan = true;
-            LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
-                " trying to assign elsewhere instead; " +
-                "try=" + i + " of " + this.maximumAttempts);
-          } else {
-            // we're done
-            return;
-          }
-
+          serverManager.sendRegionOpen(plan.getDestination(), region, 
favoredNodes);
+          return; // we're done
         } catch (Throwable t) {
           if (t instanceof RemoteException) {
             t = ((RemoteException) t).unwrapRemoteException();
@@ -1070,44 +1038,34 @@ public class AssignmentManager {
           previousException = t;
 
           // Should we wait a little before retrying? If the server is 
starting it's yes.
-          // If the region is already in transition, it's yes as well: we want 
to be sure that
-          //  the region will get opened but we don't want a double assignment.
-          boolean hold = (t instanceof RegionAlreadyInTransitionException ||
-              t instanceof ServerNotRunningYetException);
+          boolean hold = (t instanceof ServerNotRunningYetException);
 
           // In case socket is timed out and the region server is still online,
           // the openRegion RPC could have been accepted by the server and
           // just the response didn't go through.  So we will retry to
-          // open the region on the same server to avoid possible
-          // double assignment.
+          // open the region on the same server.
           boolean retry = !hold && (t instanceof 
java.net.SocketTimeoutException
               && this.serverManager.isServerOnline(plan.getDestination()));
 
-
           if (hold) {
             LOG.warn(assignMsg + ", waiting a little before trying on the same 
region server " +
               "try=" + i + " of " + this.maximumAttempts, t);
 
             if (maxWaitTime < 0) {
-              if (t instanceof RegionAlreadyInTransitionException) {
-                maxWaitTime = EnvironmentEdgeManager.currentTime()
-                  + 
this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
-                    DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
-              } else {
-                maxWaitTime = EnvironmentEdgeManager.currentTime()
-                  + this.server.getConfiguration().getLong(
-                    "hbase.regionserver.rpc.startup.waittime", 60000);
-              }
+              maxWaitTime = EnvironmentEdgeManager.currentTime()
+                + this.server.getConfiguration().getLong(
+                  "hbase.regionserver.rpc.startup.waittime", 60000);
             }
             try {
-              needNewPlan = false;
               long now = EnvironmentEdgeManager.currentTime();
               if (now < maxWaitTime) {
-                LOG.debug("Server is not yet up or region is already in 
transition; "
-                  + "waiting up to " + (maxWaitTime - now) + "ms", t);
+                if (LOG.isDebugEnabled()) {
+                  LOG.debug("Server is not yet up; waiting up to "
+                    + (maxWaitTime - now) + "ms", t);
+                }
                 Thread.sleep(100);
                 i--; // reset the try count
-              } else if (!(t instanceof RegionAlreadyInTransitionException)) {
+              } else {
                 LOG.debug("Server is not up for a while; try a new one", t);
                 needNewPlan = true;
               }
@@ -1119,9 +1077,10 @@ public class AssignmentManager {
               return;
             }
           } else if (retry) {
-            needNewPlan = false;
             i--; // we want to retry as many times as needed as long as the RS 
is not dead.
-            LOG.warn(assignMsg + ", trying to assign to the same region server 
due ", t);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(assignMsg + ", trying to assign to the same region 
server due ", t);
+            }
           } else {
             needNewPlan = true;
             LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
@@ -1200,29 +1159,17 @@ public class AssignmentManager {
 
   /**
    * @param region the region to assign
-   * @return Plan for passed <code>region</code> (If none currently, it 
creates one or
-   * if no servers to assign, it returns null).
-   */
-  private RegionPlan getRegionPlan(final HRegionInfo region,
-      final boolean forceNewPlan)  throws HBaseIOException {
-    return getRegionPlan(region, null, forceNewPlan);
-  }
-
-  /**
-   * @param region the region to assign
-   * @param serverToExclude Server to exclude (we know its bad). Pass null if
-   * all servers are thought to be assignable.
    * @param forceNewPlan If true, then if an existing plan exists, a new plan
    * will be generated.
    * @return Plan for passed <code>region</code> (If none currently, it 
creates one or
    * if no servers to assign, it returns null).
    */
   private RegionPlan getRegionPlan(final HRegionInfo region,
-      final ServerName serverToExclude, final boolean forceNewPlan) throws 
HBaseIOException {
+      final boolean forceNewPlan) throws HBaseIOException {
     // Pickup existing plan or make a new one
     final String encodedName = region.getEncodedName();
     final List<ServerName> destServers =
-      serverManager.createDestinationServersList(serverToExclude);
+      serverManager.createDestinationServersList();
 
     if (destServers.isEmpty()){
       LOG.warn("Can't move " + encodedName +
@@ -1268,15 +1215,19 @@ public class AssignmentManager {
         LOG.warn("Can't find a destination for " + encodedName);
         return null;
       }
-      LOG.debug("No previous transition plan found (or ignoring " +
-        "an existing plan) for " + region.getRegionNameAsString() +
-        "; generated random plan=" + randomPlan + "; " + destServers.size() +
-        " (online=" + serverManager.getOnlineServers().size() +
-        ") available servers, forceNewPlan=" + forceNewPlan);
-        return randomPlan;
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("No previous transition plan found (or ignoring " +
+          "an existing plan) for " + region.getRegionNameAsString() +
+          "; generated random plan=" + randomPlan + "; " + destServers.size() +
+          " (online=" + serverManager.getOnlineServers().size() +
+          ") available servers, forceNewPlan=" + forceNewPlan);
       }
-    LOG.debug("Using pre-existing plan for " +
-      region.getRegionNameAsString() + "; plan=" + existingPlan);
+      return randomPlan;
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Using pre-existing plan for " +
+        region.getRegionNameAsString() + "; plan=" + existingPlan);
+    }
     return existingPlan;
   }
 
@@ -1356,7 +1307,7 @@ public class AssignmentManager {
         return;
       }
 
-      unassign(region, state, dest);
+      unassign(region, state.getServerName(), dest);
     } finally {
       lock.unlock();
 
@@ -1481,7 +1432,7 @@ public class AssignmentManager {
           " region(s) to " + servers + " server(s)");
       }
       for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) 
{
-        if (!assign(plan.getKey(), plan.getValue())) {
+        if (!assign(plan.getKey(), plan.getValue()) && !server.isStopped()) {
           for (HRegionInfo region: plan.getValue()) {
             if (!regionStates.isRegionOnline(region)) {
               invokeAssign(region);
@@ -1727,6 +1678,9 @@ public class AssignmentManager {
       State state = regionState.getState();
       LOG.info("Processing " + regionState);
       switch (state) {
+      case CLOSED:
+        invokeAssign(regionState.getRegion());
+        break;
       case PENDING_OPEN:
         retrySendRegionOpen(regionState);
         break;
@@ -1755,41 +1709,55 @@ public class AssignmentManager {
             if (!regionState.equals(regionStates.getRegionState(hri))) {
               return; // Region is not in the expected state any more
             }
-            while (serverManager.isServerOnline(serverName)
-                && !server.isStopped() && !server.isAborted()) {
+            for (int i = 1; i <= maximumAttempts; i++) {
+              if (!serverManager.isServerOnline(serverName)
+                  || server.isStopped() || server.isAborted()) {
+                return; // No need any more
+              }
               try {
                 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                 if (shouldAssignRegionsWithFavoredNodes) {
                   favoredNodes = 
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(hri);
                 }
-                RegionOpeningState regionOpenState = 
serverManager.sendRegionOpen(
-                  serverName, hri, favoredNodes);
-
-                if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
-                  // Failed opening this region, this means the target server 
didn't get
-                  // the original region open RPC, so re-assign it with a new 
plan
-                  LOG.debug("Got failed_opening in retry sendRegionOpen for "
-                    + regionState + ", re-assign it");
-                  invokeAssign(hri, true);
-                }
-                return; // Done.
+                serverManager.sendRegionOpen(serverName, hri, favoredNodes);
+                return; // we're done
               } catch (Throwable t) {
                 if (t instanceof RemoteException) {
                   t = ((RemoteException) t).unwrapRemoteException();
                 }
-                // In case SocketTimeoutException/FailedServerException, retry
-                if (t instanceof java.net.SocketTimeoutException
-                    || t instanceof FailedServerException) {
-                  Threads.sleep(100);
-                  continue;
+                if (t instanceof FailedServerException && i < maximumAttempts) 
{
+                  // In case the server is in the failed server list, no point 
to
+                  // retry too soon. Retry after the failed_server_expiry time
+                  try {
+                    Configuration conf = this.server.getConfiguration();
+                    long sleepTime = 1 + 
conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
+                      RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+                    if (LOG.isDebugEnabled()) {
+                      LOG.debug(serverName + " is on failed server list; 
waiting "
+                        + sleepTime + "ms", t);
+                    }
+                    Thread.sleep(sleepTime);
+                    continue;
+                  } catch (InterruptedException ie) {
+                    LOG.warn("Failed to assign "
+                      + hri.getRegionNameAsString() + " since interrupted", 
ie);
+                    regionStates.updateRegionState(hri, State.FAILED_OPEN);
+                    Thread.currentThread().interrupt();
+                    return;
+                  }
                 }
-                // For other exceptions, re-assign it
-                LOG.debug("Got exception in retry sendRegionOpen for "
-                  + regionState + ", re-assign it", t);
-                invokeAssign(hri);
-                return; // Done.
+                if (serverManager.isServerOnline(serverName)
+                    && t instanceof java.net.SocketTimeoutException) {
+                  i--; // reset the try count
+                } else {
+                  LOG.info("Got exception in retrying sendRegionOpen for "
+                    + regionState + "; try=" + i + " of " + maximumAttempts, 
t);
+                }
+                Threads.sleep(100);
               }
             }
+            // Run out of attempts
+            regionStates.updateRegionState(hri, State.FAILED_OPEN);
           } finally {
             lock.unlock();
           }
@@ -1813,38 +1781,51 @@ public class AssignmentManager {
             if (!regionState.equals(regionStates.getRegionState(hri))) {
               return; // Region is not in the expected state any more
             }
-            while (serverManager.isServerOnline(serverName)
-                && !server.isStopped() && !server.isAborted()) {
+            for (int i = 1; i <= maximumAttempts; i++) {
+              if (!serverManager.isServerOnline(serverName)
+                  || server.isStopped() || server.isAborted()) {
+                return; // No need any more
+              }
               try {
-                if (!serverManager.sendRegionClose(serverName, hri, null)) {
-                  // This means the region is still on the target server
-                  LOG.debug("Got false in retry sendRegionClose for "
-                    + regionState + ", re-close it");
-                  invokeUnAssign(hri);
-                }
+                serverManager.sendRegionClose(serverName, hri, null);
                 return; // Done.
               } catch (Throwable t) {
                 if (t instanceof RemoteException) {
                   t = ((RemoteException) t).unwrapRemoteException();
                 }
-                // In case SocketTimeoutException/FailedServerException, retry
-                if (t instanceof java.net.SocketTimeoutException
-                    || t instanceof FailedServerException) {
-                  Threads.sleep(100);
-                  continue;
+                if (t instanceof FailedServerException && i < maximumAttempts) 
{
+                  // In case the server is in the failed server list, no point 
to
+                  // retry too soon. Retry after the failed_server_expiry time
+                  try {
+                    Configuration conf = this.server.getConfiguration();
+                    long sleepTime = 1 + 
conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
+                      RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+                    if (LOG.isDebugEnabled()) {
+                      LOG.debug(serverName + " is on failed server list; 
waiting "
+                        + sleepTime + "ms", t);
+                    }
+                    Thread.sleep(sleepTime);
+                    continue;
+                  } catch (InterruptedException ie) {
+                    LOG.warn("Failed to unassign "
+                      + hri.getRegionNameAsString() + " since interrupted", 
ie);
+                    regionStates.updateRegionState(hri, State.FAILED_CLOSE);
+                    Thread.currentThread().interrupt();
+                    return;
+                  }
                 }
-                if (!(t instanceof NotServingRegionException
-                    || t instanceof RegionAlreadyInTransitionException)) {
-                  // 
NotServingRegionException/RegionAlreadyInTransitionException
-                  // means the target server got the original region close 
request.
-                  // For other exceptions, re-close it
-                  LOG.debug("Got exception in retry sendRegionClose for "
-                    + regionState + ", re-close it", t);
-                  invokeUnAssign(hri);
+                if (serverManager.isServerOnline(serverName)
+                    && t instanceof java.net.SocketTimeoutException) {
+                  i--; // reset the try count
+                } else {
+                  LOG.info("Got exception in retrying sendRegionClose for "
+                    + regionState + "; try=" + i + " of " + maximumAttempts, 
t);
                 }
-                return; // Done.
+                Threads.sleep(100);
               }
             }
+            // Run out of attempts
+            regionStates.updateRegionState(hri, State.FAILED_CLOSE);
           } finally {
             lock.unlock();
           }
@@ -1886,7 +1867,7 @@ public class AssignmentManager {
   /**
    * @param region Region whose plan we are to clear.
    */
-  void clearRegionPlan(final HRegionInfo region) {
+  private void clearRegionPlan(final HRegionInfo region) {
     synchronized (this.regionPlans) {
       this.regionPlans.remove(region.getEncodedName());
     }
@@ -1933,11 +1914,7 @@ public class AssignmentManager {
   }
 
   void invokeAssign(HRegionInfo regionInfo) {
-    invokeAssign(regionInfo, true);
-  }
-
-  void invokeAssign(HRegionInfo regionInfo, boolean newPlan) {
-    threadPoolExecutorService.submit(new AssignCallable(this, regionInfo, 
newPlan));
+    threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
   }
 
   void invokeUnAssign(HRegionInfo regionInfo) {
@@ -1991,8 +1968,8 @@ public class AssignmentManager {
         }
       }
     }
-    List<HRegionInfo> regions = regionStates.serverOffline(sn);
-    for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
+    List<HRegionInfo> rits = regionStates.serverOffline(sn);
+    for (Iterator<HRegionInfo> it = rits.iterator(); it.hasNext(); ) {
       HRegionInfo hri = it.next();
       String encodedName = hri.getEncodedName();
 
@@ -2003,8 +1980,8 @@ public class AssignmentManager {
           regionStates.getRegionTransitionState(encodedName);
         if (regionState == null
             || (regionState.getServerName() != null && 
!regionState.isOnServer(sn))
-            || !(regionState.isFailedClose() || regionState.isOffline()
-              || regionState.isPendingOpenOrOpening())) {
+            || !RegionStates.isOneOfStates(regionState, State.PENDING_OPEN,
+                State.OPENING, State.FAILED_OPEN, State.FAILED_CLOSE, 
State.OFFLINE)) {
           LOG.info("Skip " + regionState + " since it is not 
opening/failed_close"
             + " on the dead server any more: " + sn);
           it.remove();
@@ -2022,7 +1999,7 @@ public class AssignmentManager {
         lock.unlock();
       }
     }
-    return regions;
+    return rits;
   }
 
   /**
@@ -2076,8 +2053,20 @@ public class AssignmentManager {
     }
   }
 
-  private void onRegionFailedOpen(
-      final HRegionInfo hri, final ServerName sn) {
+  private String onRegionFailedOpen(final RegionState current,
+      final HRegionInfo hri, final ServerName serverName) {
+    // The region must be opening on this server.
+    // If current state is failed_open on the same server,
+    // it could be a reportRegionTransition RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isOpening() || current.isFailedOpen())) {
+      return hri.getShortNameToLog() + " is not opening on " + serverName;
+    }
+
+    if (current.isFailedOpen()) {
+      return null;
+    }
+
     String encodedName = hri.getEncodedName();
     AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
     if (failedOpenCount == null) {
@@ -2098,166 +2087,386 @@ public class AssignmentManager {
       if (regionState != null) {
         // When there are more than one region server a new RS is selected as 
the
         // destination and the same is updated in the region plan. (HBASE-5546)
-        if (getTableStateManager().isTableState(hri.getTable(),
-            ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING) ||
-            replicasToClose.contains(hri)) {
+        if (getTableStateManager().isTableState(hri.getTable(), 
Table.State.DISABLED,
+            Table.State.DISABLING) || replicasToClose.contains(hri)) {
           offlineDisabledRegion(hri);
-          return;
+          return null;
         }
-        // ZK Node is in CLOSED state, assign it.
-         regionStates.updateRegionState(hri, RegionState.State.CLOSED);
+        regionStates.updateRegionState(hri, RegionState.State.CLOSED);
         // This below has to do w/ online enable/disable of a table
         removeClosedRegion(hri);
         try {
-          getRegionPlan(hri, sn, true);
+          getRegionPlan(hri, true);
         } catch (HBaseIOException e) {
           LOG.warn("Failed to get region plan", e);
         }
-        invokeAssign(hri, false);
+        invokeAssign(hri);
       }
     }
+    // Null means no error
+    return null;
   }
 
-  private void onRegionOpen(
-      final HRegionInfo hri, final ServerName sn, long openSeqNum) {
-    regionOnline(hri, sn, openSeqNum);
+  private String onRegionOpen(final RegionState current, final HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be opening on this server.
+    // If current state is already opened on the same server,
+    // it could be a reportRegionTransition RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isOpening() || current.isOpened())) {
+      return hri.getShortNameToLog() + " is not opening on " + serverName;
+    }
+
+    if (current.isOpened()) {
+      return null;
+    }
+
+    long openSeqNum = transition.hasOpenSeqNum()
+      ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM;
+    if (openSeqNum < 0) {
+      return "Newly opened region has invalid open seq num " + openSeqNum;
+    }
+    regionOnline(hri, serverName, openSeqNum);
 
     // reset the count, if any
     failedOpenTracker.remove(hri.getEncodedName());
     if (getTableStateManager().isTableState(hri.getTable(),
-        ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING)) {
+        Table.State.DISABLED, Table.State.DISABLING)) {
       invokeUnAssign(hri);
     }
+    return null;
   }
 
-  private void onRegionClosed(final HRegionInfo hri) {
-    if (getTableStateManager().isTableState(hri.getTable(),
-        ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING) ||
-        replicasToClose.contains(hri)) {
+  private String onRegionClosed(final RegionState current,
+      final HRegionInfo hri, final ServerName serverName) {
+    // We didn't check if the region is already closed/offline on the server
+    // as we did for other transitions to handle reportRegionTransition RPC 
retry.
+    // There are two reasons. 1. Closed/offline states are transient. Region 
will be
+    // usually assigned right after closed. When a RPC retry comes in, the 
region may
+    // already have moved away from closed state. 2. On the region server 
side, we
+    // don't care much about the response for this transition. We only make 
sure
+    // master has got and processed this report, either successfully or not.
+    if (current == null || !current.isOnServer(serverName) || 
!current.isClosing()) {
+      return hri.getShortNameToLog() + " is not closing on " + serverName;
+    }
+    if (getTableStateManager().isTableState(hri.getTable(), 
Table.State.DISABLED,
+        Table.State.DISABLING) || replicasToClose.contains(hri)) {
       offlineDisabledRegion(hri);
-      return;
+      return null;
     }
+
     regionStates.updateRegionState(hri, RegionState.State.CLOSED);
     sendRegionClosedNotification(hri);
     // This below has to do w/ online enable/disable of a table
     removeClosedRegion(hri);
-    invokeAssign(hri, false);
+    invokeAssign(hri);
+    return null;
   }
 
-  private String onRegionSplit(ServerName sn, TransitionCode code,
-      final HRegionInfo p, final HRegionInfo a, final HRegionInfo b) {
-    final RegionState rs_p = regionStates.getRegionState(p);
+  private String onRegionReadyToSplit(final RegionState current, final 
HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be opened on this server.
+    // If current state is already splitting on the same server,
+    // it could be a reportRegionTransition RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isOpened() || current.isSplitting())) {
+      return hri.getShortNameToLog() + " is not opening on " + serverName;
+    }
+
+    if (current.isSplitting()) {
+      return null;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
     RegionState rs_a = regionStates.getRegionState(a);
     RegionState rs_b = regionStates.getRegionState(b);
-    if (!(rs_p.isOpenOrSplittingOnServer(sn)
-        && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
-        && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
-      return "Not in state good for split";
+    if (rs_a != null || rs_b != null) {
+      return "Some daughter is already existing. "
+        + "a=" + rs_a + ", b=" + rs_b;
+    }
+
+    // Server holding is not updated at this stage.
+    // It is done after PONR.
+    regionStates.updateRegionState(hri, State.SPLITTING);
+    regionStates.createRegionState(
+      a, State.SPLITTING_NEW, serverName, null);
+    regionStates.createRegionState(
+      b, State.SPLITTING_NEW, serverName, null);
+    return null;
+  }
+
+  private String onRegionSplitPONR(final RegionState current, final 
HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be splitting on this server, and the daughters must be 
in
+    // splitting_new state. To check RPC retry, we use server holding info.
+    if (current == null || !current.isOnServer(serverName) || 
!current.isSplitting()) {
+      return hri.getShortNameToLog() + " is not splitting on " + serverName;
     }
 
-    regionStates.updateRegionState(a, State.SPLITTING_NEW, sn);
-    regionStates.updateRegionState(b, State.SPLITTING_NEW, sn);
-    regionStates.updateRegionState(p, State.SPLITTING);
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
+    RegionState rs_a = regionStates.getRegionState(a);
+    RegionState rs_b = regionStates.getRegionState(b);
+    if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+        || !rs_b.isOnServer(serverName) || !rs_a.isSplittingNew()
+        || !rs_b.isSplittingNew()) {
+      return "Some daughter is not known to be splitting on " + serverName
+        + ", a=" + rs_a + ", b=" + rs_b;
+    }
 
-    if (code == TransitionCode.SPLIT) {
-      if (TEST_SKIP_SPLIT_HANDLING) {
-        return "Skipping split message, TEST_SKIP_SPLIT_HANDLING is set";
-      }
-      regionOffline(p, State.SPLIT);
-      regionOnline(a, sn, 1);
-      regionOnline(b, sn, 1);
-
-      // User could disable the table before master knows the new region.
-      if (getTableStateManager().isTableState(p.getTable(),
-          ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING)) {
-        invokeUnAssign(a);
-        invokeUnAssign(b);
-      } else {
-        Callable<Object> splitReplicasCallable = new Callable<Object>() {
-          @Override
-          public Object call() {
-            doSplittingOfReplicas(p, a, b);
-            return null;
-          }
-        };
-        threadPoolExecutorService.submit(splitReplicasCallable);
-      }
-    } else if (code == TransitionCode.SPLIT_PONR) {
-      try {
-        regionStates.splitRegion(p, a, b, sn);
-      } catch (IOException ioe) {
-        LOG.info("Failed to record split region " + p.getShortNameToLog());
-        return "Failed to record the splitting in meta";
-      }
-    } else if (code == TransitionCode.SPLIT_REVERTED) {
-      regionOnline(p, sn);
-      regionOffline(a);
-      regionOffline(b);
-
-      if (getTableStateManager().isTableState(p.getTable(),
-          ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING)) {
-        invokeUnAssign(p);
-      }
+    if (!regionStates.isRegionOnServer(hri, serverName)) {
+      return null;
+    }
+
+    try {
+      regionStates.splitRegion(hri, a, b, serverName);
+    } catch (IOException ioe) {
+      LOG.info("Failed to record split region " + hri.getShortNameToLog());
+      return "Failed to record the splitting in meta";
     }
     return null;
   }
 
-  private String onRegionMerge(ServerName sn, TransitionCode code,
-      final HRegionInfo p, final HRegionInfo a, final HRegionInfo b) {
-    RegionState rs_p = regionStates.getRegionState(p);
+  private String onRegionSplit(final RegionState current, final HRegionInfo 
hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be splitting on this server, and the daughters must be 
in
+    // splitting_new state.
+    // If current state is already split on the same server,
+    // it could be a reportRegionTransition RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isSplitting() || current.isSplit())) {
+      return hri.getShortNameToLog() + " is not splitting on " + serverName;
+    }
+
+    if (current.isSplit()) {
+      return null;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
     RegionState rs_a = regionStates.getRegionState(a);
     RegionState rs_b = regionStates.getRegionState(b);
-    if (!(rs_a.isOpenOrMergingOnServer(sn) && rs_b.isOpenOrMergingOnServer(sn)
-        && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
-      return "Not in state good for merge";
-    }
-
-    regionStates.updateRegionState(a, State.MERGING);
-    regionStates.updateRegionState(b, State.MERGING);
-    regionStates.updateRegionState(p, State.MERGING_NEW, sn);
-
-    String encodedName = p.getEncodedName();
-    if (code == TransitionCode.READY_TO_MERGE) {
-      mergingRegions.put(encodedName,
-        new PairOfSameType<HRegionInfo>(a, b));
-    } else if (code == TransitionCode.MERGED) {
-      mergingRegions.remove(encodedName);
-      regionOffline(a, State.MERGED);
-      regionOffline(b, State.MERGED);
-      regionOnline(p, sn, 1);
-
-      // User could disable the table before master knows the new region.
-      if (getTableStateManager().isTableState(p.getTable(),
-          ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING)) {
-        invokeUnAssign(p);
-      } else {
-        Callable<Object> mergeReplicasCallable = new Callable<Object>() {
-          @Override
-          public Object call() {
-            doMergingOfReplicas(p, a, b);
-            return null;
-          }
-        };
-        threadPoolExecutorService.submit(mergeReplicasCallable);
+    if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+        || !rs_b.isOnServer(serverName) || !rs_a.isSplittingNew()
+        || !rs_b.isSplittingNew()) {
+      return "Some daughter is not known to be splitting on " + serverName
+        + ", a=" + rs_a + ", b=" + rs_b;
+    }
+
+    if (TEST_SKIP_SPLIT_HANDLING) {
+      return "Skipping split message, TEST_SKIP_SPLIT_HANDLING is set";
+    }
+    regionOffline(hri, State.SPLIT);
+    regionOnline(a, serverName, 1);
+    regionOnline(b, serverName, 1);
+
+    // User could disable the table before master knows the new region.
+    if (getTableStateManager().isTableState(hri.getTable(),
+        Table.State.DISABLED, Table.State.DISABLING)) {
+      invokeUnAssign(a);
+      invokeUnAssign(b);
+    } else {
+      Callable<Object> splitReplicasCallable = new Callable<Object>() {
+        @Override
+        public Object call() {
+          doSplittingOfReplicas(hri, a, b);
+          return null;
+        }
+      };
+      threadPoolExecutorService.submit(splitReplicasCallable);
+    }
+    return null;
+  }
+
+  private String onRegionSplitReverted(final RegionState current, final 
HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be splitting on this server, and the daughters must be 
in
+    // splitting_new state.
+    // If the region is in open state, it could be an RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isSplitting() || current.isOpened())) {
+      return hri.getShortNameToLog() + " is not splitting on " + serverName;
+    }
+
+    if (current.isOpened()) {
+      return null;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
+    RegionState rs_a = regionStates.getRegionState(a);
+    RegionState rs_b = regionStates.getRegionState(b);
+    if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+        || !rs_b.isOnServer(serverName) || !rs_a.isSplittingNew()
+        || !rs_b.isSplittingNew()) {
+      return "Some daughter is not known to be splitting on " + serverName
+        + ", a=" + rs_a + ", b=" + rs_b;
+    }
+
+    regionOnline(hri, serverName);
+    regionOffline(a);
+    regionOffline(b);
+    if (getTableStateManager().isTableState(hri.getTable(),
+        Table.State.DISABLED, Table.State.DISABLING)) {
+      invokeUnAssign(hri);
+    }
+    return null;
+  }
+
+  private String onRegionReadyToMerge(final RegionState current, final 
HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be new, and the daughters must be open on this server.
+    // If the region is in merge_new state, it could be an RPC retry.
+    if (current != null && (!current.isOnServer(serverName)
+        || !current.isMergingNew())) {
+      return "Merging daughter region already exists, p=" + current;
+    }
+
+    if (current != null) {
+      return null;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
+    Set<String> encodedNames = new HashSet<String>(2);
+    encodedNames.add(a.getEncodedName());
+    encodedNames.add(b.getEncodedName());
+    Map<String, Lock> locks = locker.acquireLocks(encodedNames);
+    try {
+      RegionState rs_a = regionStates.getRegionState(a);
+      RegionState rs_b = regionStates.getRegionState(b);
+      if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+          || !rs_b.isOnServer(serverName) || !rs_a.isOpened()
+          || !rs_b.isOpened()) {
+        return "Some daughter is not in a state to merge on " + serverName
+          + ", a=" + rs_a + ", b=" + rs_b;
       }
-    } else if (code == TransitionCode.MERGE_PONR) {
-      try {
-        regionStates.mergeRegions(p, a, b, sn);
-      } catch (IOException ioe) {
-        LOG.info("Failed to record merged region " + p.getShortNameToLog());
-        return "Failed to record the merging in meta";
+
+      regionStates.updateRegionState(a, State.MERGING);
+      regionStates.updateRegionState(b, State.MERGING);
+      regionStates.createRegionState(
+        hri, State.MERGING_NEW, serverName, null);
+      return null;
+    } finally {
+      for (Lock lock: locks.values()) {
+        lock.unlock();
       }
+    }
+  }
+
+  private String onRegionMergePONR(final RegionState current, final 
HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be in merging_new state, and the daughters must be
+    // merging. To check RPC retry, we use server holding info.
+    if (current == null || !current.isOnServer(serverName) || 
!current.isMergingNew()) {
+      return hri.getShortNameToLog() + " is not merging on " + serverName;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
+    RegionState rs_a = regionStates.getRegionState(a);
+    RegionState rs_b = regionStates.getRegionState(b);
+    if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+        || !rs_b.isOnServer(serverName) || !rs_a.isMerging()
+        || !rs_b.isMerging()) {
+      return "Some daughter is not known to be merging on " + serverName
+        + ", a=" + rs_a + ", b=" + rs_b;
+    }
+
+    if (regionStates.isRegionOnServer(hri, serverName)) {
+      return null;
+    }
+
+    try {
+      regionStates.mergeRegions(hri, a, b, serverName);
+    } catch (IOException ioe) {
+      LOG.info("Failed to record merged region " + hri.getShortNameToLog());
+      return "Failed to record the merging in meta";
+    }
+    return null;
+  }
+
+  private String onRegionMerged(final RegionState current, final HRegionInfo 
hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be in merging_new state, and the daughters must be
+    // merging on this server.
+    // If current state is already opened on the same server,
+    // it could be a reportRegionTransition RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isMergingNew() || current.isOpened())) {
+      return hri.getShortNameToLog() + " is not merging on " + serverName;
+    }
+
+    if (current.isOpened()) {
+      return null;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
+    RegionState rs_a = regionStates.getRegionState(a);
+    RegionState rs_b = regionStates.getRegionState(b);
+    if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+        || !rs_b.isOnServer(serverName) || !rs_a.isMerging()
+        || !rs_b.isMerging()) {
+      return "Some daughter is not known to be merging on " + serverName
+        + ", a=" + rs_a + ", b=" + rs_b;
+    }
+
+    regionOffline(a, State.MERGED);
+    regionOffline(b, State.MERGED);
+    regionOnline(hri, serverName, 1);
+
+    // User could disable the table before master knows the new region.
+    if (getTableStateManager().isTableState(hri.getTable(),
+        Table.State.DISABLED, Table.State.DISABLING)) {
+      invokeUnAssign(hri);
     } else {
-      mergingRegions.remove(encodedName);
-      regionOnline(a, sn);
-      regionOnline(b, sn);
-      regionOffline(p);
-
-      if (getTableStateManager().isTableState(p.getTable(),
-          ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING)) {
-        invokeUnAssign(a);
-        invokeUnAssign(b);
-      }
+      Callable<Object> mergeReplicasCallable = new Callable<Object>() {
+        @Override
+        public Object call() {
+          doMergingOfReplicas(hri, a, b);
+          return null;
+        }
+      };
+      threadPoolExecutorService.submit(mergeReplicasCallable);
+    }
+    return null;
+  }
+
+  private String onRegionMergeReverted(final RegionState current, final 
HRegionInfo hri,
+      final ServerName serverName, final RegionStateTransition transition) {
+    // The region must be in merging_new state, and the daughters must be
+    // merging on this server.
+    // If the region is in offline state, it could be an RPC retry.
+    if (current == null || !current.isOnServer(serverName)
+        || !(current.isMergingNew() || current.isOffline())) {
+      return hri.getShortNameToLog() + " is not merging on " + serverName;
+    }
+
+    if (current.isOffline()) {
+      return null;
+    }
+
+    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
+    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
+    RegionState rs_a = regionStates.getRegionState(a);
+    RegionState rs_b = regionStates.getRegionState(b);
+    if (rs_a == null || rs_b == null || !rs_a.isOnServer(serverName)
+        || !rs_b.isOnServer(serverName) || !rs_a.isMerging()
+        || !rs_b.isMerging()) {
+      return "Some daughter is not known to be merging on " + serverName
+        + ", a=" + rs_a + ", b=" + rs_b;
+    }
+
+    regionOnline(a, serverName);
+    regionOnline(b, serverName);
+    regionOffline(hri);
+
+    if (getTableStateManager().isTableState(hri.getTable(),
+        Table.State.DISABLED, Table.State.DISABLING)) {
+      invokeUnAssign(a);
+      invokeUnAssign(b);
     }
     return null;
   }
@@ -2466,76 +2675,61 @@ public class AssignmentManager {
       final RegionStateTransition transition) {
     TransitionCode code = transition.getTransitionCode();
     HRegionInfo hri = HRegionInfo.convert(transition.getRegionInfo(0));
-    RegionState current = regionStates.getRegionState(hri);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Got transition " + code + " for "
-        + (current != null ? current.toString() : hri.getShortNameToLog())
-        + " from " + serverName);
-    }
-    String errorMsg = null;
-    switch (code) {
-    case OPENED:
-      if (current != null && current.isOpened() && 
current.isOnServer(serverName)) {
-        LOG.info("Region " + hri.getShortNameToLog() + " is already " + 
current.getState() + " on "
-            + serverName);
-        break;
-      }
-    case FAILED_OPEN:
-      if (current == null
-          || !current.isPendingOpenOrOpeningOnServer(serverName)) {
-        errorMsg = hri.getShortNameToLog()
-          + " is not pending open on " + serverName;
-      } else if (code == TransitionCode.FAILED_OPEN) {
-        onRegionFailedOpen(hri, serverName);
-      } else {
-        long openSeqNum = HConstants.NO_SEQNUM;
-        if (transition.hasOpenSeqNum()) {
-          openSeqNum = transition.getOpenSeqNum();
-        }
-        if (openSeqNum < 0) {
-          errorMsg = "Newly opened region has invalid open seq num " + 
openSeqNum;
-        } else {
-          onRegionOpen(hri, serverName, openSeqNum);
-        }
+    Lock lock = locker.acquireLock(hri.getEncodedName());
+    try {
+      RegionState current = regionStates.getRegionState(hri);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Got transition " + code + " for "
+          + (current != null ? current.toString() : hri.getShortNameToLog())
+          + " from " + serverName);
       }
-      break;
+      String errorMsg = null;
+      switch (code) {
+      case OPENED:
+        errorMsg = onRegionOpen(current, hri, serverName, transition);
+        break;
+      case FAILED_OPEN:
+        errorMsg = onRegionFailedOpen(current, hri, serverName);
+        break;
+      case CLOSED:
+        errorMsg = onRegionClosed(current, hri, serverName);
+        break;
+      case READY_TO_SPLIT:
+        errorMsg = onRegionReadyToSplit(current, hri, serverName, transition);
+        break;
+      case SPLIT_PONR:
+        errorMsg = onRegionSplitPONR(current, hri, serverName, transition);
+        break;
+      case SPLIT:
+        errorMsg = onRegionSplit(current, hri, serverName, transition);
+        break;
+      case SPLIT_REVERTED:
+        errorMsg = onRegionSplitReverted(current, hri, serverName, transition);
+        break;
+      case READY_TO_MERGE:
+        errorMsg = onRegionReadyToMerge(current, hri, serverName, transition);
+        break;
+      case MERGE_PONR:
+        errorMsg = onRegionMergePONR(current, hri, serverName, transition);
+        break;
+      case MERGED:
+        errorMsg = onRegionMerged(current, hri, serverName, transition);
+        break;
+      case MERGE_REVERTED:
+        errorMsg = onRegionMergeReverted(current, hri, serverName, transition);
+        break;
 
-    case CLOSED:
-      if (current == null
-          || !current.isPendingCloseOrClosingOnServer(serverName)) {
-        errorMsg = hri.getShortNameToLog()
-          + " is not pending close on " + serverName;
-      } else {
-        onRegionClosed(hri);
+      default:
+        errorMsg = "Unexpected transition code " + code;
       }
-      break;
-
-    case READY_TO_SPLIT:
-    case SPLIT_PONR:
-    case SPLIT:
-    case SPLIT_REVERTED:
-      errorMsg = onRegionSplit(serverName, code, hri,
-        HRegionInfo.convert(transition.getRegionInfo(1)),
-        HRegionInfo.convert(transition.getRegionInfo(2)));
-      break;
-
-    case READY_TO_MERGE:
-    case MERGE_PONR:
-    case MERGED:
-    case MERGE_REVERTED:
-      errorMsg = onRegionMerge(serverName, code, hri,
-        HRegionInfo.convert(transition.getRegionInfo(1)),
-        HRegionInfo.convert(transition.getRegionInfo(2)));
-      break;
-
-    default:
-      errorMsg = "Unexpected transition code " + code;
-    }
-    if (errorMsg != null) {
-      LOG.error("Failed to transition region from " + current + " to "
-        + code + " by " + serverName + ": " + errorMsg);
+      if (errorMsg != null) {
+        LOG.error("Failed to transition region from " + current + " on "
+          + code + " by " + serverName + ": " + errorMsg);
+      }
+      return errorMsg;
+    } finally {
+      lock.unlock();
     }
-    return errorMsg;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 60296fa..6a0882c 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -561,7 +561,8 @@ public class RegionStates {
         // Offline state is also kind of pending open if the region is in
         // transition. The region could be in failed_close state too if we have
         // tried several times to open it while this region server is not 
reachable)
-        if (state.isPendingOpenOrOpening() || state.isFailedClose() || 
state.isOffline()) {
+        if (isOneOfStates(state, State.OPENING, State.PENDING_OPEN,
+            State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) {
           LOG.info("Found region in " + state + " to be reassigned by SSH for 
" + sn);
           rits.add(hri);
         } else {
@@ -724,6 +725,12 @@ public class RegionStates {
     lastAssignments.put(encodedName, serverName);
   }
 
+  synchronized boolean isRegionOnServer(
+      final HRegionInfo hri, final ServerName serverName) {
+    Set<HRegionInfo> regions = serverHoldings.get(serverName);
+    return regions == null ? false : regions.contains(hri);
+  }
+
   void splitRegion(HRegionInfo p,
       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
     regionStateStore.splitRegion(p, a, b, sn);

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 9390eba..60fb2bb 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -724,9 +724,8 @@ public class ServerManager {
   throws IOException {
     AdminService.BlockingInterface admin = getRsAdmin(server);
     if (admin == null) {
-      LOG.warn("Attempting to send OPEN RPC to server " + server.toString() +
+      throw new IOException("Attempting to send OPEN RPC to server " + 
server.toString() +
         " failed because no RPC connection found to this server");
-      return RegionOpeningState.FAILED_OPENING;
     }
     OpenRegionRequest request = RequestConverter.buildOpenRegionRequest(server,
       region, favoredNodes,
@@ -753,9 +752,8 @@ public class ServerManager {
   throws IOException {
     AdminService.BlockingInterface admin = getRsAdmin(server);
     if (admin == null) {
-      LOG.warn("Attempting to send OPEN RPC to server " + server.toString() +
+      throw new IOException("Attempting to send OPEN RPC to server " + 
server.toString() +
         " failed because no RPC connection found to this server");
-      return null;
     }
 
     OpenRegionRequest request = 
RequestConverter.buildOpenRegionRequest(regionOpenInfos,

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
index 7898edc..cae6142 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
@@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler {
               }
               toAssignRegions.add(hri);
             } else if (rit != null) {
-              if ((rit.isPendingCloseOrClosing() || rit.isOffline())
+              if ((rit.isClosing() || rit.isFailedClose() || rit.isOffline())
                   && am.getTableStateManager().isTableState(hri.getTable(),
                   ZooKeeperProtos.Table.State.DISABLED, 
ZooKeeperProtos.Table.State.DISABLING) ||
                   am.getReplicasToClose().contains(hri)) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index c5cdacf..b60befd 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -2480,10 +2480,9 @@ public class HRegionServer extends HasThread implements
    * @param abort True if we are aborting
    * @return True if closed a region.
    * @throws NotServingRegionException if the region is not online
-   * @throws RegionAlreadyInTransitionException if the region is already 
closing
    */
   protected boolean closeRegion(String encodedName, final boolean abort, final 
ServerName sn)
-      throws NotServingRegionException, RegionAlreadyInTransitionException {
+      throws NotServingRegionException {
     //Check for permissions to close.
     HRegion actualRegion = this.getFromOnlineRegions(encodedName);
     if ((actualRegion != null) && (actualRegion.getCoprocessorHost() != null)) 
{
@@ -2518,15 +2517,8 @@ public class HRegionServer extends HasThread implements
       }
     } else if (Boolean.FALSE.equals(previous)) {
       LOG.info("Received CLOSE for the region: " + encodedName +
-        " ,which we are already trying to CLOSE, but not completed yet");
-      // The master will retry till the region is closed. We need to do this 
since
-      // the region could fail to close somehow. If we mark the region closed 
in master
-      // while it is not, there could be data loss.
-      // If the region stuck in closing for a while, and master runs out of 
retries,
-      // master will move the region to failed_to_close. Later on, if the 
region
-      // is indeed closed, master can properly re-assign it.
-      throw new RegionAlreadyInTransitionException("The region " + encodedName 
+
-        " was already closing. New CLOSE request is ignored.");
+        ", which we are already trying to CLOSE, but not completed yet");
+      return true;
     }
 
     if (actualRegion == null) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 22e4d88..3e15a2e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -51,7 +51,6 @@ import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
@@ -1180,7 +1179,6 @@ public class RSRpcServices implements 
HBaseRPCErrorHandler,
    * @throws ServiceException
    */
   @Override
-  @SuppressWarnings("deprecation")
   @QosPriority(priority=HConstants.HIGH_QOS)
   public OpenRegionResponse openRegion(final RpcController controller,
       final OpenRegionRequest request) throws ServiceException {
@@ -1236,35 +1234,15 @@ public class RSRpcServices implements 
HBaseRPCErrorHandler,
       final HRegionInfo region = 
HRegionInfo.convert(regionOpenInfo.getRegion());
       HTableDescriptor htd;
       try {
-        final HRegion onlineRegion = 
regionServer.getFromOnlineRegions(region.getEncodedName());
+        String encodedName = region.getEncodedName();
+        byte[] encodedNameBytes = region.getEncodedNameAsBytes();
+        final HRegion onlineRegion = 
regionServer.getFromOnlineRegions(encodedName);
         if (onlineRegion != null) {
-          //Check if the region can actually be opened.
-          if (onlineRegion.getCoprocessorHost() != null) {
-            onlineRegion.getCoprocessorHost().preOpen();
-          }
-          // See HBASE-5094. Cross check with hbase:meta if still this RS is 
owning
-          // the region.
-          Pair<HRegionInfo, ServerName> p = MetaTableAccessor.getRegion(
-            regionServer.getShortCircuitConnection(), region.getRegionName());
-          if (regionServer.serverName.equals(p.getSecond())) {
-            Boolean closing = 
regionServer.regionsInTransitionInRS.get(region.getEncodedNameAsBytes());
-            // Map regionsInTransitionInRSOnly has an entry for a region only 
if the region
-            // is in transition on this RS, so here closing can be null. If 
not null, it can
-            // be true or false. True means the region is opening on this RS; 
while false
-            // means the region is closing. Only return ALREADY_OPENED if not 
closing (i.e.
-            // not in transition any more, or still transition to open.
-            if (!Boolean.FALSE.equals(closing)
-                && regionServer.getFromOnlineRegions(region.getEncodedName()) 
!= null) {
-              LOG.warn("Attempted open of " + region.getEncodedName()
-                + " but already online on this server");
-              builder.addOpeningState(RegionOpeningState.ALREADY_OPENED);
-              continue;
-            }
-          } else {
-            LOG.warn("The region " + region.getEncodedName() + " is online on 
this server"
-              + " but hbase:meta does not have this server - continue 
opening.");
-            regionServer.removeFromOnlineRegions(onlineRegion, null);
-          }
+          // The region is already online. This should not happen any more.
+          String error = "Received OPEN for the region:"
+            + region.getRegionNameAsString() + ", which is already online";
+          regionServer.abort(error);
+          throw new IOException(error);
         }
         LOG.info("Open " + region.getRegionNameAsString());
         htd = htds.get(region.getTable());
@@ -1274,18 +1252,23 @@ public class RSRpcServices implements 
HBaseRPCErrorHandler,
         }
 
         final Boolean previous = 
regionServer.regionsInTransitionInRS.putIfAbsent(
-          region.getEncodedNameAsBytes(), Boolean.TRUE);
+          encodedNameBytes, Boolean.TRUE);
 
         if (Boolean.FALSE.equals(previous)) {
-          // There is a close in progress. This should not happen any more.
-          throw new RegionAlreadyInTransitionException("Received OPEN for the 
region:"
-            + region.getRegionNameAsString() + " , which we are already trying 
to CLOSE ");
+          if (regionServer.getFromOnlineRegions(encodedName) != null) {
+            // There is a close in progress. This should not happen any more.
+            String error = "Received OPEN for the region:"
+              + region.getRegionNameAsString() + ", which we are already 
trying to CLOSE";
+            regionServer.abort(error);
+            throw new IOException(error);
+          }
+          regionServer.regionsInTransitionInRS.put(encodedNameBytes, 
Boolean.TRUE);
         }
 
         if (Boolean.TRUE.equals(previous)) {
           // An open is in progress. This is supported, but let's log this.
           LOG.info("Receiving OPEN for the region:" +
-            region.getRegionNameAsString() + " , which we are already trying 
to OPEN"
+            region.getRegionNameAsString() + ", which we are already trying to 
OPEN"
               + " - ignoring this new request for this region.");
         }
 
@@ -1293,7 +1276,7 @@ public class RSRpcServices implements 
HBaseRPCErrorHandler,
         // want to keep returning the stale moved record while we are 
opening/if we close again.
         regionServer.removeFromMovedRegions(region.getEncodedName());
 
-        if (previous == null) {
+        if (previous == null || !previous.booleanValue()) {
           // check if the region to be opened is marked in recovering state in 
ZK
           if 
(ZKSplitLog.isRegionMarkedRecoveringInZK(regionServer.getZooKeeper(),
               region.getEncodedName())) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java
index 77d8167..1c886d8 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java
@@ -122,7 +122,7 @@ public class CloseRegionHandler extends EventHandler {
       LOG.debug("Closed " + region.getRegionNameAsString());
     } finally {
       this.rsServices.getRegionsInTransitionInRS().
-          remove(this.regionInfo.getEncodedNameAsBytes());
+        remove(this.regionInfo.getEncodedNameAsBytes(), Boolean.FALSE);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
index 4b5556e..c001468 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
@@ -191,12 +191,10 @@ public class TestAssignmentManagerOnCluster {
 
       // Region is assigned now. Let's assign it again.
       // Master should not abort, and region should be assigned.
-      RegionState oldState = regionStates.getRegionState(hri);
       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
       master.getAssignmentManager().waitForAssignment(hri);
       RegionState newState = regionStates.getRegionState(hri);
-      assertTrue(newState.isOpened()
-        && newState.getStamp() != oldState.getStamp());
+      assertTrue(newState.isOpened());
     } finally {
       TEST_UTIL.deleteTable(Bytes.toBytes(table));
     }
@@ -231,7 +229,7 @@ public class TestAssignmentManagerOnCluster {
       // Use the first server as the destination server
       ServerName destServer = onlineServers.iterator().next();
 
-      // Created faked dead server
+      // Created faked dead server that is still online in master
       deadServer = ServerName.valueOf(destServer.getHostname(),
           destServer.getPort(), destServer.getStartcode() - 100L);
       master.serverManager.recordNewServerWithLock(deadServer, 
ServerLoad.EMPTY_SERVERLOAD);
@@ -415,14 +413,11 @@ public class TestAssignmentManagerOnCluster {
   }
 
   /**
-   * This test should not be flaky. If it is flaky, it means something
-   * wrong with AssignmentManager which should be reported and fixed
-   *
-   * This tests forcefully assign a region while it's closing and re-assigned.
+   * This tests assign a region while it's closing.
    */
   @Test (timeout=60000)
-  public void testForceAssignWhileClosing() throws Exception {
-    String table = "testForceAssignWhileClosing";
+  public void testAssignWhileClosing() throws Exception {
+    String table = "testAssignWhileClosing";
     try {
       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
       desc.addFamily(new HColumnDescriptor(FAMILY));
@@ -664,14 +659,6 @@ public class TestAssignmentManagerOnCluster {
 
       MyRegionObserver.postCloseEnabled.set(true);
       am.unassign(hri);
-      // Now region should pending_close or closing
-      // Unassign it again so that we can trigger already
-      // in transition exception. This test is to make sure this scenario
-      // is handled properly.
-      am.server.getConfiguration().setLong(
-        AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
-      am.getRegionStates().updateRegionState(hri, 
RegionState.State.FAILED_CLOSE);
-      am.unassign(hri);
 
       // Let region closing move ahead. The region should be closed
       // properly and re-assigned automatically
@@ -727,7 +714,7 @@ public class TestAssignmentManagerOnCluster {
       am.unassign(hri);
       RegionState state = am.getRegionStates().getRegionState(hri);
       ServerName oldServerName = state.getServerName();
-      assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
+      assertTrue(state.isOpening() && oldServerName != null);
 
       // Now the region is stuck in opening
       // Let's forcefully re-assign it to trigger closing/opening
@@ -816,6 +803,7 @@ public class TestAssignmentManagerOnCluster {
 
       // You can't unassign a dead region before SSH either
       am.unassign(hri);
+      state = regionStates.getRegionState(hri);
       assertTrue(state.isFailedClose());
 
       // Enable SSH so that log can be split

http://git-wip-us.apache.org/repos/asf/hbase/blob/3a82cf23/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java
index ed5dfbf..e494819 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java
@@ -26,7 +26,6 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MediumTests;
-import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.HTable;
@@ -247,29 +246,6 @@ public class TestRegionServerNoMaster {
     checkRegionIsOpened(HTU, getRS(), hri);
   }
 
-  @Test
-  public void testOpenClosingRegion() throws Exception {
-    Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
-
-    try {
-      // we re-opened meta so some of its data is lost
-      ServerName sn = getRS().getServerName();
-      
MetaTableAccessor.updateRegionLocation(getRS().getShortCircuitConnection(),
-        hri, sn, getRS().getRegion(regionName).getOpenSeqNum());
-      // fake region to be closing now, need to clear state afterwards
-      getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), 
Boolean.FALSE);
-      AdminProtos.OpenRegionRequest orr =
-        RequestConverter.buildOpenRegionRequest(sn, hri, null, null);
-      getRS().rpcServices.openRegion(null, orr);
-      Assert.fail("The closing region should not be opened");
-    } catch (ServiceException se) {
-      Assert.assertTrue("The region should be already in transition",
-        se.getCause() instanceof RegionAlreadyInTransitionException);
-    } finally {
-      getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
-    }
-  }
-
   @Test(timeout = 60000)
   public void testMultipleCloseFromMaster() throws Exception {
     for (int i = 0; i < 10; i++) {
@@ -277,11 +253,10 @@ public class TestRegionServerNoMaster {
           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), 
regionName, null);
       try {
         AdminProtos.CloseRegionResponse responseClose = 
getRS().rpcServices.closeRegion(null, crr);
-        Assert.assertEquals("The first request should succeeds", 0, i);
         Assert.assertTrue("request " + i + " failed",
             responseClose.getClosed() || responseClose.hasClosed());
       } catch (ServiceException se) {
-        Assert.assertTrue("The next queries should throw an exception.", i > 
0);
+        Assert.assertTrue("The next queries may throw an exception.", i > 0);
       }
     }
 

Reply via email to