HBASE-19021 Restore a few important missing logics for balancer in 2.0 Signed-off-by: Jerry He <jerry...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/9716f62f Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/9716f62f Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/9716f62f Branch: refs/heads/HBASE-18410 Commit: 9716f62f43195ef024ac7a4bafb93a4716a7323e Parents: 1c1906e Author: Jerry He <jerry...@apache.org> Authored: Tue Oct 24 07:53:17 2017 -0700 Committer: Jerry He <jerry...@apache.org> Committed: Tue Oct 24 07:53:17 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/master/HMaster.java | 9 +++++--- .../hbase/master/assignment/RegionStates.java | 8 +++++++ .../master/procedure/ServerCrashProcedure.java | 1 + .../hadoop/hbase/TestRegionRebalancing.java | 24 ++++++++++++++++---- 4 files changed, 35 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/9716f62f/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 8f2ae6b..bb36520 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1421,16 +1421,19 @@ public class HMaster extends HRegionServer implements MasterServices { } } + boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false); Map<TableName, Map<ServerName, List<RegionInfo>>> assignmentsByTable = - this.assignmentManager.getRegionStates().getAssignmentsByTable(); + this.assignmentManager.getRegionStates().getAssignmentsByTable(!isByTable); List<RegionPlan> plans = new ArrayList<>(); //Give the balancer the current cluster state. this.balancer.setClusterStatus(getClusterStatus()); - this.balancer.setClusterLoad( - this.assignmentManager.getRegionStates().getAssignmentsByTable()); + this.balancer.setClusterLoad(assignmentsByTable); + for (Map<ServerName, List<RegionInfo>> serverMap : assignmentsByTable.values()) { + serverMap.keySet().removeAll(this.serverManager.getDrainingServersList()); + } for (Entry<TableName, Map<ServerName, List<RegionInfo>>> e : assignmentsByTable.entrySet()) { List<RegionPlan> partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue()); if (partialPlans != null) plans.addAll(partialPlans); http://git-wip-us.apache.org/repos/asf/hbase/blob/9716f62f/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index c13a49d..3b58fe2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -756,6 +756,14 @@ public class RegionStates { serverResult.add(node.getRegionInfo()); } + // Add online servers with no assignment for the table. + for (Map<ServerName, List<RegionInfo>> table: result.values()) { + for (ServerName svr : serverMap.keySet()) { + if (!table.containsKey(svr)) { + table.put(svr, new ArrayList<RegionInfo>()); + } + } + } return result; } http://git-wip-us.apache.org/repos/asf/hbase/blob/9716f62f/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index a0ee628..56efaeb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -172,6 +172,7 @@ implements ServerProcedureInterface { break; case SERVER_CRASH_FINISH: + services.getAssignmentManager().getRegionStates().removeServer(serverName); services.getServerManager().getDeadServers().finish(serverName); return Flow.NO_MORE_STATE; http://git-wip-us.apache.org/repos/asf/hbase/blob/9716f62f/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java index 3f7ea3b..cb9f768 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -53,9 +54,9 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; /** * Test whether region re-balancing works. (HBASE-71) + * The test only works for cluster wide balancing, not per table wide. + * Increase the margin a little to make StochasticLoadBalancer result acceptable. */ -@Ignore // This is broken since new RegionServers does proper average of regions -// and because Master is treated as a regionserver though it hosts two regions only. @Category({FlakeyTests.class, LargeTests.class}) @RunWith(value = Parameterized.class) public class TestRegionRebalancing { @@ -131,6 +132,7 @@ public class TestRegionRebalancing { // add a region server - total of 3 LOG.info("Started third server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); + waitForAllRegionsAssigned(); assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); @@ -147,12 +149,14 @@ public class TestRegionRebalancing { LOG.info("Added fourth server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); waitOnCrashProcessing(); + waitForAllRegionsAssigned(); assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); for (int i = 0; i < 6; i++){ LOG.info("Adding " + (i + 5) + "th region server"); UTIL.getHBaseCluster().startRegionServer(); } + waitForAllRegionsAssigned(); assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); regionLocator.close(); @@ -188,9 +192,14 @@ public class TestRegionRebalancing { long regionCount = UTIL.getMiniHBaseCluster().countServedRegions(); List<HRegionServer> servers = getOnlineRegionServers(); - double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad(); + double avg = (double)regionCount / (double)servers.size(); int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop)); int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1; + // Increase the margin a little to accommodate StochasticLoadBalancer + if (this.balancerName.contains("StochasticLoadBalancer")) { + avgLoadPlusSlop++; + avgLoadMinusSlop--; + } LOG.debug("There are " + servers.size() + " servers and " + regionCount + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop + ", up border: " + avgLoadPlusSlop + "; attempt: " + i); @@ -250,13 +259,20 @@ public class TestRegionRebalancing { */ private void waitForAllRegionsAssigned() throws IOException { int totalRegions = HBaseTestingUtility.KEYS.length; + try { + Thread.sleep(200); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) { // while (!cluster.getMaster().allRegionsAssigned()) { LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + UTIL.getMiniHBaseCluster().countServedRegions() + " right now."); try { Thread.sleep(200); - } catch (InterruptedException e) {} + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } } UTIL.waitUntilNoRegionsInTransition(); }