rmdmattingly commented on code in PR #6593: URL: https://github.com/apache/hbase/pull/6593#discussion_r1909330627
########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return true; } - if (sloppyRegionServerExist(cs)) { + if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && sloppyRegionServerExist(cs)) { LOG.info("Running balancer because cluster has sloppy server(s)." + " function cost={}", functionCost()); return true; } + if (balancerConditionals.shouldRunBalancer(cluster)) { + LOG.info("Running balancer because conditional candidate generators have important moves"); + return true; + } + double total = 0.0; + double multiplierTotal = 0; // this.sumMultiplier is not necessarily initialized at this point for (CostFunction c : costFunctions) { if (!c.isNeeded()) { LOG.trace("{} not needed", c.getClass().getSimpleName()); continue; } total += c.cost() * c.getMultiplier(); + multiplierTotal += c.getMultiplier(); } - boolean balanced = (total / sumMultiplier < minCostNeedBalance); + boolean balanced = (total / multiplierTotal < minCostNeedBalance); if (balanced) { final double calculatedTotal = total; sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, sumMultiplier), costFunctions); LOG.info( "{} - skipping load balancing because weighted average imbalance={} <= " - + "threshold({}). If you want more aggressive balancing, either lower " + + "threshold({}) and conditionals do not have opinionated move candidates. " + + "consecutive balancer runs. If you want more aggressive balancing, either lower " + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative " + "multiplier(s) of the specific cost function(s). functionCost={}", isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", total / sumMultiplier, minCostNeedBalance, minCostNeedBalance, functionCost()); } else { - LOG.info("{} - Calculating plan. may take up to {}ms to complete.", - isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime); + LOG.info( + "{} - Calculating plan. may take up to {}ms to complete. currentCost={}, targetCost={}", + isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime, total, + minCostNeedBalance); } return !balanced; } @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") - BalanceAction nextAction(BalancerClusterState cluster) { - return getRandomGenerator().generate(cluster); + Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState cluster) { + CandidateGenerator generator = getRandomGenerator(cluster); + return Pair.newPair(generator, generator.generate(cluster)); Review Comment: Returning a pair here allows for better logging of generator activity, which can be nice when debugging the balancer's decision making ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -372,70 +407,108 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return true; } - if (sloppyRegionServerExist(cs)) { + if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && sloppyRegionServerExist(cs)) { LOG.info("Running balancer because cluster has sloppy server(s)." + " function cost={}", functionCost()); return true; } + if (balancerConditionals.shouldRunBalancer(cluster)) { + LOG.info("Running balancer because conditional candidate generators have important moves"); + return true; + } + double total = 0.0; + double multiplierTotal = 0; // this.sumMultiplier is not necessarily initialized at this point for (CostFunction c : costFunctions) { if (!c.isNeeded()) { LOG.trace("{} not needed", c.getClass().getSimpleName()); continue; } total += c.cost() * c.getMultiplier(); + multiplierTotal += c.getMultiplier(); } - boolean balanced = (total / sumMultiplier < minCostNeedBalance); + boolean balanced = (total / multiplierTotal < minCostNeedBalance); if (balanced) { final double calculatedTotal = total; sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, sumMultiplier), costFunctions); LOG.info( "{} - skipping load balancing because weighted average imbalance={} <= " - + "threshold({}). If you want more aggressive balancing, either lower " + + "threshold({}) and conditionals do not have opinionated move candidates. " + + "consecutive balancer runs. If you want more aggressive balancing, either lower " + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative " + "multiplier(s) of the specific cost function(s). functionCost={}", isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", total / sumMultiplier, minCostNeedBalance, minCostNeedBalance, functionCost()); } else { - LOG.info("{} - Calculating plan. may take up to {}ms to complete.", - isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime); + LOG.info( + "{} - Calculating plan. may take up to {}ms to complete. currentCost={}, targetCost={}", + isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime, total, + minCostNeedBalance); } return !balanced; } @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") - BalanceAction nextAction(BalancerClusterState cluster) { - return getRandomGenerator().generate(cluster); + Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState cluster) { + CandidateGenerator generator = getRandomGenerator(cluster); + return Pair.newPair(generator, generator.generate(cluster)); } /** * Select the candidate generator to use based on the cost of cost functions. The chance of * selecting a candidate generator is propotional to the share of cost of all cost functions among * all cost functions that benefit from it. */ - protected CandidateGenerator getRandomGenerator() { Review Comment: I had to rewrite this method, largely because of the refactor to a Map of generators and their weights, rather than lists. Anyway, this also fixes a bug in this method: it modifies the weightsOfGenerators array in place. That is: 1. It first computes a running sum in weightsOfGenerators[i]. 2. It then divides those values by the total to get a cumulative distribution. 3. It uses this (mutated) array to select a generator. 4. But the original weights are overwritten and never restored. As a result, this method can produce an unfair distribution of generator activity. Maybe in practice it was okay because we called `updateCostsAndWeightsWithAction` frequently enough and that implicitly resets weights — but I think in reality, it was likely that we ran into NULL_ACTIONs that manifested as no-ops frequently, and that produced skewed generator weights. Anyway, this was fragile and buggy, and it should be better and more fair now ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java: ########## @@ -122,6 +129,14 @@ class BalancerClusterState { // Maps regionName -> oldServerName -> cache ratio of the region on the old server Map<String, Pair<ServerName, Float>> regionCacheRatioOnOldServerMap; + private Supplier<List<Integer>> shuffledServerIndicesSupplier = + Suppliers.memoizeWithExpiration(() -> { + Collection<Integer> serverIndices = serversToIndex.values(); + List<Integer> shuffledServerIndices = new ArrayList<>(serverIndices); + Collections.shuffle(shuffledServerIndices); + return shuffledServerIndices; + }, 5, TimeUnit.SECONDS); Review Comment: I often found that it was nice to let opinionated candidate generators iterate through servers in different orders on subsequent runs, so this saves some cycles across generators that may want that (replica distribution and table isolation both rely on this atm) ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java: ########## @@ -899,6 +970,48 @@ int[] addRegion(int[] regions, int regionIndex) { return newRegions; } + int[] removeRegions(int[] regions, Set<Integer> regionIndicesToRemove) { + // Calculate the size of the new regions array + int newSize = regions.length - regionIndicesToRemove.size(); + if (newSize < 0) { + throw new IllegalStateException( + "Region indices mismatch: more regions to remove than in the regions array"); + } + + int[] newRegions = new int[newSize]; + int newIndex = 0; + + // Copy only the regions not in the removal set + for (int region : regions) { + if (!regionIndicesToRemove.contains(region)) { + newRegions[newIndex++] = region; + } + } + + // If the newIndex is smaller than newSize, some regions were missing from the input array + if (newIndex != newSize) { + throw new IllegalStateException("Region indices mismatch: some regions in the removal " + + "set were not found in the regions array"); + } + + return newRegions; + } + + int[] addRegions(int[] regions, Set<Integer> regionIndicesToAdd) { + int[] newRegions = new int[regions.length + regionIndicesToAdd.size()]; + + // Copy the existing regions to the new array + System.arraycopy(regions, 0, newRegions, 0, regions.length); + + // Add the new regions at the end of the array + int newIndex = regions.length; + for (int regionIndex : regionIndicesToAdd) { + newRegions[newIndex++] = regionIndex; + } + + return newRegions; + } Review Comment: These methods make it easier to support the MoveBatch action that might remove or add several regions to a server ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditional.java: ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +@InterfaceStability.Evolving +public abstract class RegionPlanConditional { Review Comment: The RegionPlanConditional is a deliberately restrictive, but still powerful, view into the implications of a move. Each RegionPlanConditional should implement `isViolatingServer(RegionPlan regionPlan, Set<RegionInfo> destinationRegions)`, `isViolatingHost`, and `isViolatingRack` as necessary to ensure that RegionPlans don't conflict with its intentions for the given destination, based on the regions already on said destination. To help ensure that we always find a path to success, RegionPlanConditionals can also provide a List of specialized CandidateGenerators that will be prioritized over our OOTB generators. This ensures that, even as clusters scale, we aren't relying on randomness to _eventually_ lead us to an ideal solution. ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return true; } - if (sloppyRegionServerExist(cs)) { + if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && sloppyRegionServerExist(cs)) { LOG.info("Running balancer because cluster has sloppy server(s)." + " function cost={}", functionCost()); return true; } + if (balancerConditionals.shouldRunBalancer(cluster)) { + LOG.info("Running balancer because conditional candidate generators have important moves"); + return true; + } + double total = 0.0; + double multiplierTotal = 0; // this.sumMultiplier is not necessarily initialized at this point for (CostFunction c : costFunctions) { if (!c.isNeeded()) { LOG.trace("{} not needed", c.getClass().getSimpleName()); continue; } total += c.cost() * c.getMultiplier(); + multiplierTotal += c.getMultiplier(); } - boolean balanced = (total / sumMultiplier < minCostNeedBalance); + boolean balanced = (total / multiplierTotal < minCostNeedBalance); Review Comment: Another bug I found is that `sumMultiplier` might not be initialized at this point. As a result, we should just build the multiplier on the fly as we iterate through cost functions anyway. This ensures that our `boolean balanced` derivation will have the necessary inputs available ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableColocationCandidateGenerator.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This will generate candidates that colocate a table on the number of RegionServers equal to its + * number of replicas. For example, this is useful when isolating system tables. + */ +@InterfaceAudience.Private +public final class TableColocationCandidateGenerator Review Comment: System table isolation is one thing, but we also want colocation of any tables that we're isolation. For example, if we have 10 system table regions that we've isolated, and they're on 10 different servers, then it feels wasteful to have 10 servers dedicated to hosting only 1 region of said system table. This generator will ensure that we smartly find our way to a plan that colocates the given system table on one server (or _n_ servers, if the table has >1 replica) ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java: ########## @@ -28,6 +28,7 @@ enum Type { ASSIGN_REGION, MOVE_REGION, SWAP_REGIONS, + MOVE_BATCH, Review Comment: Conditional candidate generators might need to make a series of moves, depending on how complex the path to success is. This new action type allows generators to string together a series of moves ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java: ########## @@ -705,7 +720,41 @@ enum LocalityType { RACK } - public void doAction(BalanceAction action) { + public List<RegionPlan> convertActionToPlans(BalanceAction action) { Review Comment: It's easier to reason about what a RegionPlan is, vs what a BalanceAction is. So the new conditionals are intentionally RegionPlanConditionals that expect to work with only RegionPlans as the unified representation for what any BalanceAction might want to do. As a result, I needed a way to transform BalanceActions to RegionPlans _without_ running the action against the BCS ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java: ########## @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.lang.reflect.Constructor; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.util.ReflectionUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; + +/** + * Balancer conditionals supplement cost functions in the {@link StochasticLoadBalancer}. Cost + * functions are insufficient and difficult to work with when making discrete decisions; this is + * because they operate on a continuous scale, and each cost function's multiplier affects the + * relative importance of every other cost function. So it is difficult to meaningfully and clearly + * value many aspects of your region distribution via cost functions alone. Conditionals allow you + * to very clearly define discrete rules that your balancer would ideally follow. To clarify, a + * conditional violation will not block a region assignment because we would prefer to have uptime + * than have perfectly intentional balance. But conditionals allow you to, for example, define that + * a region's primary and secondary should not live on the same rack. Another example, conditionals + * make it easy to define that system tables will ideally be isolated on their own RegionServer + * (without needing to manage distinct RegionServer groups). Use of conditionals may cause an + * extremely unbalanced cluster to exceed its max balancer runtime. This is necessary because + * conditional candidate generation is quite expensive, and cutting it off early could prevent us + * from finding a solution. + */ +@InterfaceAudience.Private +final class BalancerConditionals implements Configurable { Review Comment: This class is statically available via the package protected `BalancerConditionals.INSTANCE`, and it exists to be a unified interface into all conditionals that are configured in the current balancer run ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -157,14 +160,17 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { private RegionReplicaHostCostFunction regionReplicaHostCostFunction; private RegionReplicaRackCostFunction regionReplicaRackCostFunction; - protected List<CandidateGenerator> candidateGenerators; + protected Map<Class<? extends CandidateGenerator>, CandidateGenerator> candidateGenerators; + private Map<Class<? extends CandidateGenerator>, Double> weightsOfGenerators; + private final Supplier<List<Class<? extends CandidateGenerator>>> shuffledGeneratorClasses = + Suppliers.memoizeWithExpiration(() -> { + List<Class<? extends CandidateGenerator>> shuffled = + new ArrayList<>(candidateGenerators.keySet()); + Collections.shuffle(shuffled); + return shuffled; + }, 5, TimeUnit.SECONDS); Review Comment: In my work here, I realized that we biased towards certain candidate generators. This reshuffling of generator order is now possible because I've removed our reliance on enum ordinals, and helps to provide a more earnestly stochastic approach when not relying on conditional candidate generators ########## hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java: ########## @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY; +import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class CandidateGeneratorTestUtil { Review Comment: The rest of the changes here, probably at least half of this PR, are tests. I've setup minicluster tests to validate that clusters startup and balance as intended with conditionals enabled, and I've setup simpler tests that validate the StochasticLoadBalancer's ability to get out of very hairy, high scale, imbalances via conditionals and their generators. I've setup tests that validate a wide variety of conditional usage, and that all conditionals implemented in this PR play nicely with one another. ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -213,16 +219,18 @@ private void loadCustomCostFunctions(Configuration conf) { @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*/src/test/.*") - List<CandidateGenerator> getCandidateGenerators() { + Map<Class<? extends CandidateGenerator>, CandidateGenerator> getCandidateGenerators() { Review Comment: As mentioned above, I moved away from this system of candidate generators ordered by the GeneratorType enum ordinal because I thought that was a fragile setup, and because it was necessary to provide more fair generator usage. This Map approach would also allow us to offer pluggable candidate generators via some `...additionalGenerators` config. I think that could be a nice way to let users write some custom balancer code, without the complexities associated with designing cost functions or building their own custom balancer. That said, I've not included this pluggable generator idea in this PR. ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -332,8 +342,33 @@ void updateMetricsSize(int size) { } private boolean areSomeRegionReplicasColocated(BalancerClusterState c) { + if (!c.hasRegionReplicas || balancerConditionals.isReplicaDistributionEnabled()) { + // This check is unnecessary without replicas, or with conditional replica distribution + // The balancer will auto-run if conditional replica distribution candidates are available + return false; + } + + // Check host regionReplicaHostCostFunction.prepare(c); - return (Math.abs(regionReplicaHostCostFunction.cost()) > CostFunction.COST_EPSILON); + double hostCost = Math.abs(regionReplicaHostCostFunction.cost()); + boolean colocatedAtHost = hostCost > CostFunction.COST_EPSILON; + if (colocatedAtHost) { + return true; + } + LOG.trace("No host colocation detected with host cost={}", hostCost); + + // Check rack + regionReplicaRackCostFunction.prepare(c); + double rackCost = Math.abs(regionReplicaRackCostFunction.cost()); + boolean colocatedAtRack = + (Math.abs(regionReplicaRackCostFunction.cost()) > CostFunction.COST_EPSILON); + if (colocatedAtRack) { + return true; + } + LOG.trace("No rack colocation detected with rack cost={}", rackCost); + + return DistributeReplicasCandidateGenerator.INSTANCE.generateCandidate(c, true) + != BalanceAction.NULL_ACTION; } Review Comment: This method was never great, because it only bothered to check colocation on hosts. It never bothered to check racks. I've fixed that here, while also making it a no-op when using conditional-based replica distribution (which makes this check unnecessary — the balancer will always run if conditional candidate generators have work to do) ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return true; } - if (sloppyRegionServerExist(cs)) { + if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && sloppyRegionServerExist(cs)) { LOG.info("Running balancer because cluster has sloppy server(s)." + " function cost={}", functionCost()); return true; } + if (balancerConditionals.shouldRunBalancer(cluster)) { + LOG.info("Running balancer because conditional candidate generators have important moves"); + return true; + } + double total = 0.0; + double multiplierTotal = 0; // this.sumMultiplier is not necessarily initialized at this point for (CostFunction c : costFunctions) { if (!c.isNeeded()) { LOG.trace("{} not needed", c.getClass().getSimpleName()); continue; } total += c.cost() * c.getMultiplier(); + multiplierTotal += c.getMultiplier(); } - boolean balanced = (total / sumMultiplier < minCostNeedBalance); + boolean balanced = (total / multiplierTotal < minCostNeedBalance); if (balanced) { final double calculatedTotal = total; sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, sumMultiplier), costFunctions); LOG.info( "{} - skipping load balancing because weighted average imbalance={} <= " - + "threshold({}). If you want more aggressive balancing, either lower " + + "threshold({}) and conditionals do not have opinionated move candidates. " + + "consecutive balancer runs. If you want more aggressive balancing, either lower " + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative " + "multiplier(s) of the specific cost function(s). functionCost={}", isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", total / sumMultiplier, minCostNeedBalance, minCostNeedBalance, functionCost()); } else { - LOG.info("{} - Calculating plan. may take up to {}ms to complete.", - isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime); + LOG.info( + "{} - Calculating plan. may take up to {}ms to complete. currentCost={}, targetCost={}", + isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", maxRunningTime, total, + minCostNeedBalance); } return !balanced; } @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") - BalanceAction nextAction(BalancerClusterState cluster) { - return getRandomGenerator().generate(cluster); + Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState cluster) { + CandidateGenerator generator = getRandomGenerator(cluster); + return Pair.newPair(generator, generator.generate(cluster)); } /** * Select the candidate generator to use based on the cost of cost functions. The chance of * selecting a candidate generator is propotional to the share of cost of all cost functions among * all cost functions that benefit from it. */ - protected CandidateGenerator getRandomGenerator() { - double sum = 0; - for (int i = 0; i < weightsOfGenerators.length; i++) { - sum += weightsOfGenerators[i]; - weightsOfGenerators[i] = sum; + protected CandidateGenerator getRandomGenerator(BalancerClusterState cluster) { + // Prefer conditional generators if they have moves to make + if (balancerConditionals.isConditionalBalancingEnabled()) { + for (RegionPlanConditional conditional : balancerConditionals.getConditionals()) { + List<RegionPlanConditionalCandidateGenerator> generators = + conditional.getCandidateGenerators(); + for (RegionPlanConditionalCandidateGenerator generator : generators) { + if (generator.getWeight(cluster) > 0) { + return generator; + } + } + } + } Review Comment: We will always prefer conditional generator moves if they exist ########## hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java: ########## @@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return true; } - if (sloppyRegionServerExist(cs)) { + if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && sloppyRegionServerExist(cs)) { Review Comment: Because conditional balancing may deliberately produce sloppy servers that we have no intention of, or path for, fixing, I've opted to disable this trigger when conditional balancing is enabled. I think this is an appropriate tradeoff -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org