This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 9eb3706 HBASE-26309 Balancer tends to move regions to the server at
the end of list (#3811)
9eb3706 is described below
commit 9eb370602158da5839f4b7b3655f4f51d997f612
Author: clarax <[email protected]>
AuthorDate: Tue Nov 2 06:58:50 2021 -0700
HBASE-26309 Balancer tends to move regions to the server at the end of list
(#3811)
Signed-off-by: Duo Zhang <[email protected]>
---
.../master/balancer/BalancerClusterState.java | 8 +++-
.../master/balancer/LoadCandidateGenerator.java | 51 +++++++++++++++++-----
.../master/balancer/StochasticLoadBalancer.java | 5 +--
.../hbase/master/balancer/BalancerTestBase.java | 38 +++++++++++++++-
...ochasticLoadBalancerRegionReplicaWithRacks.java | 32 +++++++++++++-
5 files changed, 115 insertions(+), 19 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 6ffb550..b49deb8 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -187,7 +187,7 @@ class BalancerClusterState {
int rackIndex = racksToIndex.get(rack);
serversPerRackList.get(rackIndex).add(serverIndex);
}
-
+ LOG.debug("Hosts are {} racks are {}", hostsToIndex, racksToIndex);
// Count how many regions there are.
for (Map.Entry<ServerName, List<RegionInfo>> entry :
clusterState.entrySet()) {
numRegions += entry.getValue().size();
@@ -286,6 +286,7 @@ class BalancerClusterState {
serversPerHost[i] = new int[serversPerHostList.get(i).size()];
for (int j = 0; j < serversPerHost[i].length; j++) {
serversPerHost[i][j] = serversPerHostList.get(i).get(j);
+ LOG.debug("server {} is on host {}",serversPerHostList.get(i).get(j),
i);
}
if (serversPerHost[i].length > 1) {
multiServersPerHost = true;
@@ -296,6 +297,7 @@ class BalancerClusterState {
serversPerRack[i] = new int[serversPerRackList.get(i).size()];
for (int j = 0; j < serversPerRack[i].length; j++) {
serversPerRack[i][j] = serversPerRackList.get(i).get(j);
+ LOG.info("server {} is on rack {}",serversPerRackList.get(i).get(j),
i);
}
}
@@ -787,6 +789,10 @@ class BalancerClusterState {
return regionsPerServer[server].length;
}
+ public Comparator<Integer> getNumRegionsComparator() {
+ return numRegionsComparator;
+ }
+
boolean contains(int[] arr, int val) {
return Arrays.binarySearch(arr, val) >= 0;
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadCandidateGenerator.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadCandidateGenerator.java
index 595e185..edaf082 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadCandidateGenerator.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadCandidateGenerator.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hbase.master.balancer;
+import java.util.concurrent.ThreadLocalRandom;
import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
@@ -34,27 +35,53 @@ class LoadCandidateGenerator extends CandidateGenerator {
private int pickLeastLoadedServer(final BalancerClusterState cluster, int
thisServer) {
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
- int index = 0;
- while (servers[index] == null || servers[index] == thisServer) {
- index++;
- if (index == servers.length) {
- return -1;
+ int selectedIndex = -1;
+ double currentLargestRandom = -1;
+ for (int i = 0; i < servers.length; i++) {
+ if (servers[i] == null || servers[i] == thisServer) {
+ continue;
+ }
+ if (selectedIndex != -1
+ && cluster.getNumRegionsComparator().compare(servers[i],
servers[selectedIndex]) != 0) {
+ // Exhausted servers of the same region count
+ break;
+ }
+ // we don't know how many servers have the same region count, we will
randomly select one
+ // using a simplified inline reservoir sampling by assigning a random
number to stream
+ // data and choose the greatest one.
(http://gregable.com/2007/10/reservoir-sampling.html)
+ double currentRandom = ThreadLocalRandom.current().nextDouble();
+ if (currentRandom > currentLargestRandom) {
+ selectedIndex = i;
+ currentLargestRandom = currentRandom;
}
}
- return servers[index];
+ return selectedIndex == -1 ? -1 : servers[selectedIndex];
}
private int pickMostLoadedServer(final BalancerClusterState cluster, int
thisServer) {
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
- int index = servers.length - 1;
- while (servers[index] == null || servers[index] == thisServer) {
- index--;
- if (index < 0) {
- return -1;
+ int selectedIndex = -1;
+ double currentLargestRandom = -1;
+ for (int i = servers.length - 1; i >= 0; i--) {
+ if (servers[i] == null || servers[i] == thisServer) {
+ continue;
+ }
+ if (selectedIndex != -1 &&
cluster.getNumRegionsComparator().compare(servers[i],
+ servers[selectedIndex]) != 0) {
+ // Exhausted servers of the same region count
+ break;
+ }
+ // we don't know how many servers have the same region count, we will
randomly select one
+ // using a simplified inline reservoir sampling by assigning a random
number to stream
+ // data and choose the greatest one.
(http://gregable.com/2007/10/reservoir-sampling.html)
+ double currentRandom = ThreadLocalRandom.current().nextDouble();
+ if (currentRandom > currentLargestRandom) {
+ selectedIndex = i;
+ currentLargestRandom = currentRandom;
}
}
- return servers[index];
+ return selectedIndex == -1? -1 : servers[selectedIndex];
}
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index f2993de..cb82673 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -545,10 +545,9 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
LOG.info("Finished computing new moving plan. Computation took {} ms" +
" to try {} different iterations. Found a solution that moves " +
"{} regions; Going from a computed imbalance of {}" +
- " to a new imbalance of {}. ",
+ " to a new imbalance of {}. funtionCost={}",
endTime - startTime, step, plans.size(),
- initCost / sumMultiplier, currentCost / sumMultiplier);
-
+ initCost / sumMultiplier, currentCost / sumMultiplier, functionCost());
sendRegionPlansToRingBuffer(plans, currentCost, initCost,
initFunctionTotalCosts, step);
return plans;
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
index 603ccb3..f92e225 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java
@@ -580,7 +580,7 @@ public class BalancerTestBase {
List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
// Print out the cluster loads to make debugging easier.
- LOG.info("Mock Balance : " + printMock(balancedCluster));
+ LOG.info("Mock after Balance : " + printMock(balancedCluster));
if (assertFullyBalanced) {
assertClusterAsBalanced(balancedCluster);
@@ -596,6 +596,42 @@ public class BalancerTestBase {
}
}
+ protected void testWithClusterWithIteration(Map<ServerName,
List<RegionInfo>> serverMap,
+ RackManager rackManager, boolean assertFullyBalanced, boolean
assertFullyBalancedForReplicas) {
+ List<ServerAndLoad> list = convertToList(serverMap);
+ LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
+
+ loadBalancer.setRackManager(rackManager);
+ // Run the balancer.
+ Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable = (Map)
mockClusterServersWithTables(serverMap);
+ List<RegionPlan> plans = loadBalancer.balanceCluster(LoadOfAllTable);
+ assertNotNull("Initial cluster balance should produce plans.", plans);
+
+ List<ServerAndLoad> balancedCluster = null;
+ // Run through iteration until done. Otherwise will be killed as test time
out
+ while (plans != null && (assertFullyBalanced ||
assertFullyBalancedForReplicas)) {
+ // Apply the plan to the mock cluster.
+ balancedCluster = reconcile(list, plans, serverMap);
+
+ // Print out the cluster loads to make debugging easier.
+ LOG.info("Mock after balance: " + printMock(balancedCluster));
+
+ LoadOfAllTable = (Map) mockClusterServersWithTables(serverMap);
+ plans = loadBalancer.balanceCluster(LoadOfAllTable);
+ }
+
+ // Print out the cluster loads to make debugging easier.
+ LOG.info("Mock Final balance: " + printMock(balancedCluster));
+
+ if (assertFullyBalanced) {
+ assertNull("Given a requirement to be fully balanced, second attempt at
plans should " +
+ "produce none.", plans);
+ }
+ if (assertFullyBalancedForReplicas) {
+ assertRegionReplicaPlacement(serverMap, rackManager);
+ }
+ }
+
protected Map<ServerName, List<RegionInfo>> createServerMap(int numNodes,
int numRegions,
int
numRegionsPerServer,
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
index ea305d3..c1948de 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.master.balancer;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -38,6 +39,8 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks
extends BalancerTe
private static class ForTestRackManager extends RackManager {
int numRacks;
+ Map<String, Integer> serverIndexes = new HashMap<String, Integer>();
+ int numServers = 0;
public ForTestRackManager(int numRacks) {
this.numRacks = numRacks;
@@ -45,13 +48,18 @@ public class
TestStochasticLoadBalancerRegionReplicaWithRacks extends BalancerTe
@Override
public String getRack(ServerName server) {
- return "rack_" + (server.hashCode() % numRacks);
+ String key = server.getServerName();
+ if (!serverIndexes.containsKey(key)) {
+ serverIndexes.put(key, numServers++);
+ }
+ return "rack_" + serverIndexes.get(key) % numRacks;
}
}
@Test
public void testRegionReplicationOnMidClusterWithRacks() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 100000000L);
+ conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 *
1000); // 120 sec
loadBalancer.onConfigurationChange(conf);
@@ -65,6 +73,26 @@ public class
TestStochasticLoadBalancerRegionReplicaWithRacks extends BalancerTe
createServerMap(numNodes, numRegions, numRegionsPerServer,
replication, numTables);
RackManager rm = new ForTestRackManager(numRacks);
- testWithCluster(serverMap, rm, false, true);
+ testWithClusterWithIteration(serverMap, rm, true, true);
+ }
+
+ @Test
+ public void testRegionReplicationOnLargeClusterWithRacks() {
+ conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", false);
+ conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 5000L);
+ conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
+ conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 10 *
1000); // 10 sec
+ loadBalancer.onConfigurationChange(conf);
+ int numNodes = 100;
+ int numRegions = numNodes * 30;
+ int replication = 3; // 3 replicas per region
+ int numRegionsPerServer = 28;
+ int numTables = 1;
+ int numRacks = 4; // all replicas should be on a different rack
+ Map<ServerName, List<RegionInfo>> serverMap =
+ createServerMap(numNodes, numRegions, numRegionsPerServer, replication,
numTables);
+ RackManager rm = new ForTestRackManager(numRacks);
+
+ testWithClusterWithIteration(serverMap, rm, true, true);
}
}