Repository: hbase
Updated Branches:
  refs/heads/0.98 780f6f525 -> 3b4b1de3c
  refs/heads/branch-1 908779b88 -> 4ff742742
  refs/heads/master 72a6a670a -> 4ac457a7b


HBASE-12480 Regions in FAILED_OPEN/FAILED_CLOSE should be processed on master 
failover


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/4ac457a7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/4ac457a7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/4ac457a7

Branch: refs/heads/master
Commit: 4ac457a7bc909cc92e0a1a0cab21ed0ce6bae893
Parents: 72a6a67
Author: Virag Kothari <[email protected]>
Authored: Tue Jan 13 11:03:16 2015 -0800
Committer: Virag Kothari <[email protected]>
Committed: Tue Jan 13 11:03:16 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/master/AssignmentManager.java  | 20 ++++++++---
 .../hadoop/hbase/master/TestMasterFailover.java | 36 ++++++++++++++++++--
 2 files changed, 49 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/4ac457a7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 2f6679f..b17561a 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -450,8 +450,9 @@ public class AssignmentManager {
         Map<String, RegionState> regionsInTransition = 
regionStates.getRegionsInTransition();
         if (!regionsInTransition.isEmpty()) {
           for (RegionState regionState: regionsInTransition.values()) {
+            ServerName serverName = regionState.getServerName();
             if (!regionState.getRegion().isMetaRegion()
-                && onlineServers.contains(regionState.getServerName())) {
+                && serverName != null && onlineServers.contains(serverName)) {
               LOG.debug("Found " + regionState + " in RITs");
               failover = true;
               break;
@@ -1694,18 +1695,23 @@ public class AssignmentManager {
   /**
    * Processes list of regions in transition at startup
    */
-  void processRegionsInTransition(Collection<RegionState> regionStates) {
+  void processRegionsInTransition(Collection<RegionState> regionsInTransition) 
{
     // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions
     // in case the RPC call is not sent out yet before the master was shut down
     // since we update the state before we send the RPC call. We can't update
     // the state after the RPC call. Otherwise, we don't know what's happened
     // to the region if the master dies right after the RPC call is out.
-    for (RegionState regionState: regionStates) {
-      if (!serverManager.isServerOnline(regionState.getServerName())) {
+    for (RegionState regionState: regionsInTransition) {
+      LOG.info("Processing " + regionState);
+      ServerName serverName = regionState.getServerName();
+      // Server could be null in case of FAILED_OPEN when master cannot find a 
region plan. In that
+      // case, try assigning it here.
+      if (serverName != null && 
!serverManager.getOnlineServers().containsKey(serverName)) {
+        LOG.info("Server " + serverName + " isn't online. SSH will handle 
this");
         continue; // SSH will handle it
       }
+      HRegionInfo regionInfo = regionState.getRegion();
       RegionState.State state = regionState.getState();
-      LOG.info("Processing " + regionState);
       switch (state) {
       case CLOSED:
         invokeAssign(regionState.getRegion());
@@ -1716,6 +1722,10 @@ public class AssignmentManager {
       case PENDING_CLOSE:
         retrySendRegionClose(regionState);
         break;
+      case FAILED_CLOSE:
+      case FAILED_OPEN:
+        invokeUnAssign(regionInfo);
+        break;
       default:
         // No process for other states
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/4ac457a7/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index f211754..cae1258 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -217,7 +217,7 @@ public class TestMasterFailover {
     HMaster master = masterThreads.get(0).getMaster();
     assertTrue(master.isActiveMaster());
     assertTrue(master.isInitialized());
-
+    
     // Create a table with a region online
     Table onlineTable = 
TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
     onlineTable.close();
@@ -260,7 +260,36 @@ public class TestMasterFailover {
     oldState = new RegionState(hriOffline, State.OFFLINE);
     newState = new RegionState(hriOffline, State.PENDING_OPEN, 
newState.getServerName());
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
-
+    
+    HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), 
null, null);
+    createRegion(failedClose, rootdir, conf, offlineTable);
+    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
+    
+    oldState = new RegionState(failedClose, State.PENDING_CLOSE);
+    newState = new RegionState(failedClose, State.FAILED_CLOSE, 
newState.getServerName());
+    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
+    HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), 
null, null);
+    createRegion(failedOpen, rootdir, conf, offlineTable);
+    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
+    
+    // Simulate a region transitioning to failed open when the region server 
reports the
+    // transition as FAILED_OPEN
+    oldState = new RegionState(failedOpen, State.PENDING_OPEN);
+    newState = new RegionState(failedOpen, State.FAILED_OPEN, 
newState.getServerName());
+    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
+    HRegionInfo failedOpenNullServer = new 
HRegionInfo(offlineTable.getTableName(), null, null);
+    LOG.info("Failed open NUll server " + 
failedOpenNullServer.getEncodedName());
+    createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
+    MetaTableAccessor.addRegionToMeta(master.getConnection(), 
failedOpenNullServer);
+    
+    // Simulate a region transitioning to failed open when the master couldn't 
find a plan for
+    // the region
+    oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
+    newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
+    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
     // Stop the master
     log("Aborting master");
     cluster.abortMaster(0);
@@ -283,6 +312,9 @@ public class TestMasterFailover {
     // Both pending_open (RPC sent/not yet) regions should be online
     assertTrue(regionStates.isRegionOnline(hriOffline));
     assertTrue(regionStates.isRegionOnline(hriOnline));
+    assertTrue(regionStates.isRegionOnline(failedClose));
+    assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
+    assertTrue(regionStates.isRegionOnline(failedOpen));
 
     log("Done with verification, shutting down cluster");
 

Reply via email to