Repository: hbase Updated Branches: refs/heads/0.98 42d0b4c56 -> bf27f3311
HBASE-12139 StochasticLoadBalancer doesn't work on large lightly loaded clusters Summary: Currently the move cost overshadows the skew cost on a large cluster. This can render the split policy worse than useless and it can trap meta on the most loaded server in the cluster. Test Plan: Unit tests everywhere likeaboss Reviewers: stack, manukranthk Subscribers: tedyu Differential Revision: https://reviews.facebook.net/D24285 Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/bf27f331 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/bf27f331 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/bf27f331 Branch: refs/heads/0.98 Commit: bf27f33119a8fd9b6d502d60eefde12bedd4a113 Parents: 42d0b4c Author: Elliott Clark <[email protected]> Authored: Wed Oct 1 00:12:23 2014 -0700 Committer: Elliott Clark <[email protected]> Committed: Wed Oct 1 13:01:53 2014 -0700 ---------------------------------------------------------------------- .arcconfig | 2 +- .../master/balancer/StochasticLoadBalancer.java | 24 +++++++++++--- .../hbase/master/balancer/BalancerTestBase.java | 18 +++++----- .../balancer/TestStochasticLoadBalancer.java | 35 ++++++++++++++++---- 4 files changed, 59 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/bf27f331/.arcconfig ---------------------------------------------------------------------- diff --git a/.arcconfig b/.arcconfig index 9cdb37d..8eef934 100644 --- a/.arcconfig +++ b/.arcconfig @@ -1,5 +1,5 @@ { - "project_id" : "hbase", + "project_id" : "HBaseOnGithub", "conduit_uri" : "https://reviews.facebook.net/", "copyright_holder" : "Apache Software Foundation", "max_line_length" : 100 http://git-wip-us.apache.org/repos/asf/hbase/blob/bf27f331/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 69a7bba..debbd9d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -597,14 +597,28 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // Compute max as if all region servers had 0 and one had the sum of all costs. This must be // a zero sum cost for this to make sense. - // TODO: Should we make this sum of square errors? double max = ((count - 1) * mean) + (total - mean); - for (double n : stats) { + + // It's possible that there aren't enough regions to go around + double min; + if (count > total) { + min = ((count - total) * mean) + ((1 - mean) * total); + } else { + // Some will have 1 more than everything else. + int numHigh = (int) (total - (Math.floor(mean) * count)); + int numLow = (int) (count - numHigh); + + min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean))); + + } + min = Math.max(0, min); + for (int i=0; i<stats.length; i++) { + double n = stats[i]; double diff = Math.abs(mean - n); totalCost += diff; } - double scaled = scale(0, max, totalCost); + double scaled = scale(min, max, totalCost); return scaled; } @@ -630,8 +644,9 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { if (max == 0 || value == 0) { return 0; } + if ((max - min) <= 0) return 0; - return Math.max(0d, Math.min(1d, (value - min) / max)); + return Math.max(0d, Math.min(1d, (value - min) / (max - min))); } } @@ -710,6 +725,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { for (int i =0; i < cluster.numServers; i++) { stats[i] = cluster.regionsPerServer[i].length; } + return costFromArray(stats); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/bf27f331/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java index 310ae90..d32168a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java @@ -130,20 +130,22 @@ public class BalancerTestBase { List<RegionPlan> plans, Map<ServerName, List<HRegionInfo>> servers) { List<ServerAndLoad> result = new ArrayList<ServerAndLoad>(list.size()); - if (plans == null) return result; + Map<ServerName, ServerAndLoad> map = new HashMap<ServerName, ServerAndLoad>(list.size()); for (ServerAndLoad sl : list) { map.put(sl.getServerName(), sl); } - for (RegionPlan plan : plans) { - ServerName source = plan.getSource(); + if (plans != null) { + for (RegionPlan plan : plans) { + ServerName source = plan.getSource(); - updateLoad(map, source, -1); - ServerName destination = plan.getDestination(); - updateLoad(map, destination, +1); + updateLoad(map, source, -1); + ServerName destination = plan.getDestination(); + updateLoad(map, destination, +1); - servers.get(source).remove(plan.getRegionInfo()); - servers.get(destination).add(plan.getRegionInfo()); + servers.get(source).remove(plan.getRegionInfo()); + servers.get(destination).add(plan.getRegionInfo()); + } } result.clear(); result.addAll(map.values()); http://git-wip-us.apache.org/repos/asf/hbase/blob/bf27f331/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 162a257..cbd1fff 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -57,10 +57,26 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public static void beforeAllTests() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); + conf.setFloat("hbase.regions.slop", 0.0f); loadBalancer = new StochasticLoadBalancer(); loadBalancer.setConf(conf); } + int[] largeCluster = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 56 }; + // int[testnum][servernumber] -> numregions int[][] clusterStateMocks = new int[][]{ // 1 node @@ -78,7 +94,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { new int[]{0, 1}, new int[]{10, 1}, new int[]{514, 1432}, - new int[]{47, 53}, + new int[]{48, 53}, // 3 node new int[]{0, 1, 2}, new int[]{1, 2, 3}, @@ -115,7 +131,9 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { new int[]{10, 7, 12, 8, 11, 10, 9, 14}, new int[]{13, 14, 6, 10, 10, 10, 8, 10}, new int[]{130, 14, 60, 10, 100, 10, 80, 10}, - new int[]{130, 140, 60, 100, 100, 100, 80, 100} + new int[]{130, 140, 60, 100, 100, 100, 80, 100}, + largeCluster, + }; @Test @@ -191,18 +209,21 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { assertTrue(cost >= 0); assertTrue(cost <= 1.01); } - assertEquals(1, + + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 0, 0, 0, 1})), 0.01); - assertEquals(.75, + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 0, 0, 1, 1})), 0.01); - assertEquals(.5, + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 0, 1, 1, 1})), 0.01); - assertEquals(.25, + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 1, 1, 1, 1})), 0.01); assertEquals(0, costFunction.cost(mockCluster(new int[]{1, 1, 1, 1, 1})), 0.01); assertEquals(0, costFunction.cost(mockCluster(new int[]{10, 10, 10, 10, 10})), 0.01); + assertEquals(1, + costFunction.cost(mockCluster(new int[]{10000, 0, 0, 0, 0})), 0.01); } @Test @@ -234,7 +255,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { for (int i =0; i < 100; i++) { statTwo[i] = 0; } - statTwo[100] = 100; + statTwo[100] = 101; assertEquals(1, costFunction.costFromArray(statTwo), 0.01); double[] statThree = new double[200];
