Repository: hbase Updated Branches: refs/heads/0.98 780f6f525 -> 3b4b1de3c refs/heads/branch-1 908779b88 -> 4ff742742 refs/heads/master 72a6a670a -> 4ac457a7b
HBASE-12480 Regions in FAILED_OPEN/FAILED_CLOSE should be processed on master failover Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/4ac457a7 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/4ac457a7 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/4ac457a7 Branch: refs/heads/master Commit: 4ac457a7bc909cc92e0a1a0cab21ed0ce6bae893 Parents: 72a6a67 Author: Virag Kothari <[email protected]> Authored: Tue Jan 13 11:03:16 2015 -0800 Committer: Virag Kothari <[email protected]> Committed: Tue Jan 13 11:03:16 2015 -0800 ---------------------------------------------------------------------- .../hadoop/hbase/master/AssignmentManager.java | 20 ++++++++--- .../hadoop/hbase/master/TestMasterFailover.java | 36 ++++++++++++++++++-- 2 files changed, 49 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/4ac457a7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 2f6679f..b17561a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -450,8 +450,9 @@ public class AssignmentManager { Map<String, RegionState> regionsInTransition = regionStates.getRegionsInTransition(); if (!regionsInTransition.isEmpty()) { for (RegionState regionState: regionsInTransition.values()) { + ServerName serverName = regionState.getServerName(); if (!regionState.getRegion().isMetaRegion() - && onlineServers.contains(regionState.getServerName())) { + && serverName != null && onlineServers.contains(serverName)) { LOG.debug("Found " + regionState + " in RITs"); failover = true; break; @@ -1694,18 +1695,23 @@ public class AssignmentManager { /** * Processes list of regions in transition at startup */ - void processRegionsInTransition(Collection<RegionState> regionStates) { + void processRegionsInTransition(Collection<RegionState> regionsInTransition) { // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions // in case the RPC call is not sent out yet before the master was shut down // since we update the state before we send the RPC call. We can't update // the state after the RPC call. Otherwise, we don't know what's happened // to the region if the master dies right after the RPC call is out. - for (RegionState regionState: regionStates) { - if (!serverManager.isServerOnline(regionState.getServerName())) { + for (RegionState regionState: regionsInTransition) { + LOG.info("Processing " + regionState); + ServerName serverName = regionState.getServerName(); + // Server could be null in case of FAILED_OPEN when master cannot find a region plan. In that + // case, try assigning it here. + if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) { + LOG.info("Server " + serverName + " isn't online. SSH will handle this"); continue; // SSH will handle it } + HRegionInfo regionInfo = regionState.getRegion(); RegionState.State state = regionState.getState(); - LOG.info("Processing " + regionState); switch (state) { case CLOSED: invokeAssign(regionState.getRegion()); @@ -1716,6 +1722,10 @@ public class AssignmentManager { case PENDING_CLOSE: retrySendRegionClose(regionState); break; + case FAILED_CLOSE: + case FAILED_OPEN: + invokeUnAssign(regionInfo); + break; default: // No process for other states } http://git-wip-us.apache.org/repos/asf/hbase/blob/4ac457a7/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index f211754..cae1258 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -217,7 +217,7 @@ public class TestMasterFailover { HMaster master = masterThreads.get(0).getMaster(); assertTrue(master.isActiveMaster()); assertTrue(master.isInitialized()); - + // Create a table with a region online Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family"); onlineTable.close(); @@ -260,7 +260,36 @@ public class TestMasterFailover { oldState = new RegionState(hriOffline, State.OFFLINE); newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName()); stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); - + + HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null); + createRegion(failedClose, rootdir, conf, offlineTable); + MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose); + + oldState = new RegionState(failedClose, State.PENDING_CLOSE); + newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName()); + stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); + + HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null); + createRegion(failedOpen, rootdir, conf, offlineTable); + MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen); + + // Simulate a region transitioning to failed open when the region server reports the + // transition as FAILED_OPEN + oldState = new RegionState(failedOpen, State.PENDING_OPEN); + newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName()); + stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); + + HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null); + LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName()); + createRegion(failedOpenNullServer, rootdir, conf, offlineTable); + MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer); + + // Simulate a region transitioning to failed open when the master couldn't find a plan for + // the region + oldState = new RegionState(failedOpenNullServer, State.OFFLINE); + newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null); + stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState); + // Stop the master log("Aborting master"); cluster.abortMaster(0); @@ -283,6 +312,9 @@ public class TestMasterFailover { // Both pending_open (RPC sent/not yet) regions should be online assertTrue(regionStates.isRegionOnline(hriOffline)); assertTrue(regionStates.isRegionOnline(hriOnline)); + assertTrue(regionStates.isRegionOnline(failedClose)); + assertTrue(regionStates.isRegionOnline(failedOpenNullServer)); + assertTrue(regionStates.isRegionOnline(failedOpen)); log("Done with verification, shutting down cluster");
