busbey commented on a change in pull request #3356:
URL: https://github.com/apache/hbase/pull/3356#discussion_r657441767
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
##########
@@ -319,38 +318,33 @@ boolean needsBalance(TableName tableName,
BalancerClusterState cluster) {
return true;
}
+ sumMultiplier = 0.0f;
double total = 0.0;
- float sumMultiplier = 0.0f;
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
- if (multiplier <= 0) {
- LOG.trace("{} not needed because multiplier is <= 0",
c.getClass().getSimpleName());
- continue;
- }
+ double cost = c.cost();
if (!c.isNeeded()) {
LOG.trace("{} not needed", c.getClass().getSimpleName());
continue;
}
+ total += cost * multiplier;
sumMultiplier += multiplier;
- total += c.cost() * multiplier;
}
- boolean balanced = total <= 0 || sumMultiplier <= 0 ||
- (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
+ boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+
Review comment:
Do we have a guard somewhere against all the multipliers being 0?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
##########
@@ -285,14 +286,12 @@ private boolean
areSomeRegionReplicasColocated(BalancerClusterState c) {
return false;
}
- private String getBalanceReason(double total, double sumMultiplier) {
+ private String getBalanceReason(double total) {
if (total <= 0) {
- return "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)
= " + total + " <= 0";
- } else if (sumMultiplier <= 0) {
- return "sumMultiplier = " + sumMultiplier + " <= 0";
+ return "(weighted sum of imbalance = " + total + " <= 0";
} else if ((total / sumMultiplier) < minCostNeedBalance) {
- return
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier
= " +
- (total / sumMultiplier) + " <= minCostNeedBalance(" +
minCostNeedBalance + ")";
+ return "(weighted average imbalance = " +
+ (total / sumMultiplier) + " < threshold (" + minCostNeedBalance + ")";
Review comment:
Do we have a guard somewhere against all the multipliers being 0?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
##########
@@ -423,8 +417,9 @@ private long calculateMaxSteps(BalancerClusterState
cluster) {
maxSteps);
}
}
- LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost
+ ", functionCost="
- + functionCost() + " computedMaxSteps: " + computedMaxSteps);
+ LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average
imbalance ="
+ + currentCost / sumMultiplier + ", functionCost=" + functionCost() + "
computedMaxSteps: "
+ + computedMaxSteps);
Review comment:
why is `currentCost` divided by `sumMultiplier` here? isn't
`currentCost` by itself already the weighted average measure of imbalance?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
##########
@@ -319,38 +318,33 @@ boolean needsBalance(TableName tableName,
BalancerClusterState cluster) {
return true;
}
+ sumMultiplier = 0.0f;
double total = 0.0;
- float sumMultiplier = 0.0f;
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
- if (multiplier <= 0) {
- LOG.trace("{} not needed because multiplier is <= 0",
c.getClass().getSimpleName());
- continue;
- }
+ double cost = c.cost();
if (!c.isNeeded()) {
LOG.trace("{} not needed", c.getClass().getSimpleName());
continue;
}
+ total += cost * multiplier;
sumMultiplier += multiplier;
- total += c.cost() * multiplier;
}
- boolean balanced = total <= 0 || sumMultiplier <= 0 ||
- (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
+ boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+
if (balanced) {
final double calculatedTotal = total;
- final double calculatedMultiplier = sumMultiplier;
- sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal,
calculatedMultiplier),
- costFunctions);
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to
need a balance is {}",
- balanced ? "Skipping load balancing because balanced" : "We need to
load balance",
- isByTable ? String.format("table (%s)", tableName) : "cluster",
- total, sumMultiplier, minCostNeedBalance);
- if (LOG.isTraceEnabled()) {
- LOG.trace("Balance decision detailed function costs={}",
functionCost());
- }
+ sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal),
costFunctions);
+ LOG.info("{} - skipping load balancing because weighted average
imbalance={} > threshold({})."
+ + "functionCost={}."
+ + "If you want more aggressive balancing, either lower
minCostNeedbalance {}"
+ + "or increase the relative weight(s) of the specific cost
function(s).",
+ isByTable ? "Table specific ("+tableName+")" : "Cluster wide",
functionCost(),
+ total / sumMultiplier, minCostNeedBalance);
Review comment:
When an operator sees this message and wants to "lower
minCostNeedBalance" how do they do that?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
##########
@@ -470,17 +465,19 @@ private long calculateMaxSteps(BalancerClusterState
cluster) {
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
if (initCost > currentCost) {
List<RegionPlan> plans = createRegionPlans(cluster);
- LOG.info("Finished computing new load balance plan. Computation took {}"
+
- " to try {} different iterations. Found a solution that moves " +
- "{} regions; Going from a computed cost of {}" +
- " to a new cost of {}", java.time.Duration.ofMillis(endTime -
startTime),
- step, plans.size(), initCost, currentCost);
+ LOG.info("Finished computing new moving plan. Computation took {} ms" +
+ " to try {} different iterations. Found a solution that moves " +
+ "{} regions; Going from a computed imbalance of {}" +
+ " to a new imbalance of {}. ",
+ endTime - startTime, step, plans.size(), initCost / sumMultiplier,
currentCost / sumMultiplier);
sendRegionPlansToRingBuffer(plans, currentCost, initCost,
initFunctionTotalCosts, step);
+ LOG.info("Moving plan submitted. Balancer is going into sleep until next
period");
return plans;
}
- LOG.info("Could not find a better load balance plan. Tried {} different
configurations in " +
- "{}, and did not find anything with a computed cost less than {}", step,
- java.time.Duration.ofMillis(endTime - startTime), initCost);
+ LOG.info("Could not find a better moving plan. Tried {} different
configurations in " +
+ "{} ms, and did not find anything with an imbalance score less than
{}", step,
+ endTime - startTime, initCost / sumMultiplier);
+ LOG.info("Balancer is going into sleep until next period");
Review comment:
can we say in this message approx how long that will be?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
##########
@@ -423,8 +417,9 @@ private long calculateMaxSteps(BalancerClusterState
cluster) {
maxSteps);
}
}
- LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost
+ ", functionCost="
- + functionCost() + " computedMaxSteps: " + computedMaxSteps);
+ LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average
imbalance ="
+ + currentCost / sumMultiplier + ", functionCost=" + functionCost() + "
computedMaxSteps: "
+ + computedMaxSteps);
Review comment:
please use parameter substitution
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]