Repository: hbase Updated Branches: refs/heads/branch-2.0 fef4a5913 -> 97578babc
HBASE-21344 hbase:meta location in ZooKeeper set to OPENING by the procedure which eventually failed but precludes Master from assigning it forever Signed-off-by: Michael Stack <st...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/97578bab Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/97578bab Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/97578bab Branch: refs/heads/branch-2.0 Commit: 97578babc652dbd6c2cc9ba608315a31a15394c9 Parents: fef4a59 Author: Ankit Singhal <ankitsingha...@gmail.com> Authored: Wed Oct 24 17:29:40 2018 -0700 Committer: Michael Stack <st...@apache.org> Committed: Wed Oct 24 20:58:05 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/master/HMaster.java | 1 - .../hadoop/hbase/HBaseTestingUtility.java | 12 +++-- .../hbase/master/TestMetaShutdownHandler.java | 52 +++++++++++++++++++- 3 files changed, 58 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/97578bab/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 5e10964..af0e189 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -926,7 +926,6 @@ public class HMaster extends HRegionServer implements MasterServices { if (initMetaProc != null) { initMetaProc.await(); } - tableStateManager.start(); // Wake up this server to check in sleeper.skipSleepCycle(); http://git-wip-us.apache.org/repos/asf/hbase/blob/97578bab/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 574db2f..4c8b641 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -2669,12 +2669,14 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { } } - private void decrementMinRegionServerCount(Configuration conf) { - int currentCount = conf.getInt( - ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1); + /* + * Doesn't need to be called when using expireRegionServerSession as it will automatically + * decrement the min count + */ + public void decrementMinRegionServerCount(Configuration conf) { + int currentCount = conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1); if (currentCount != -1) { - conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, - Math.max(currentCount - 1, 1)); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, Math.max(currentCount - 1, 1)); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/97578bab/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java index 7faed1c..5c452ff 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -60,7 +61,7 @@ public class TestMetaShutdownHandler { @BeforeClass public static void setUpBeforeClass() throws Exception { - TEST_UTIL.startMiniCluster(1, 3, null, null, MyRegionServer.class); + TEST_UTIL.startMiniCluster(2, 3, null, null, MyRegionServer.class); } @AfterClass @@ -130,6 +131,55 @@ public class TestMetaShutdownHandler { metaState.getServerName(), metaServerName); } + /** + * Master should be able to recover from any unexpected state of meta-region-server znode + */ + @Test + public void testMetaAssignmentFailure() throws Exception { + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + HMaster master = cluster.getMaster(); + RegionStates regionStates = master.getAssignmentManager().getRegionStates(); + ServerName metaServerName = + regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); + if (master.getServerName().equals(metaServerName) || metaServerName == null + || !metaServerName.equals(cluster.getServerHoldingMeta())) { + metaServerName = + cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName(); + master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), + Bytes.toBytes(metaServerName.getServerName())); + TEST_UTIL.waitUntilNoRegionsInTransition(60000); + metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); + } + RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); + assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState()); + assertNotEquals("Meta is on master!", metaServerName, master.getServerName()); + // Setting meta state to incorrect state OPENING, to see if master restarts or standby node can + // recover it + MetaTableLocator.setMetaLocation(master.getZooKeeper(), metaServerName, + RegionState.State.OPENING); + master.abort("Abort to test whether standby assign the meta OPENING region"); + AssignmentTestingUtil.killRs(TEST_UTIL, metaServerName); + final HMaster oldMaster = master; + TEST_UTIL.decrementMinRegionServerCount(conf); + TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { + @Override + public boolean evaluate() throws Exception { + // test that standby master should be able to recover meta + return cluster.getMaster() != null && cluster.getMaster().isInitialized() + && oldMaster != cluster.getMaster(); + } + }); + master = cluster.getMaster(); + // Now, make sure meta is assigned + assertTrue("Meta should be assigned", master.getAssignmentManager().getRegionStates() + .isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); + // Now, make sure meta is registered in zk as well + metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); + assertEquals("Meta should not be in transition", RegionState.State.OPEN, metaState.getState()); + assertEquals("Meta should be assigned", metaState.getServerName(), master.getAssignmentManager() + .getRegionStates().getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); + } + public static class MyRegionServer extends MiniHBaseClusterRegionServer { public MyRegionServer(Configuration conf) throws IOException, KeeperException,