This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 6ab6d6f  HBASE-25973 Balancer should explain progress in a better way 
in log - backport branch-2 (#3485)
6ab6d6f is described below

commit 6ab6d6f2316daa7a547dbb7c09a6d0c8ce581c2d
Author: clarax <[email protected]>
AuthorDate: Fri Jul 16 15:20:14 2021 -0700

    HBASE-25973 Balancer should explain progress in a better way in log - 
backport branch-2 (#3485)
    
    
    Signed-off-by: stack <[email protected]>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |   2 +
 .../hbase/master/balancer/BaseLoadBalancer.java    |   2 +
 .../master/balancer/StochasticLoadBalancer.java    | 145 +++++++++++----------
 3 files changed, 80 insertions(+), 69 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 9086ad3..6d0d1ca 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1858,6 +1858,8 @@ public class HMaster extends HRegionServer implements 
MasterServices {
         }
       }
     }
+    LOG.info("Balancer is going into sleep until next period in {}ms", 
getConfiguration()
+      .getInt(HConstants.HBASE_BALANCER_PERIOD, 
HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
     return successRegionPlans;
   }
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index 96902b7..2e55b9b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -74,6 +74,8 @@ import 
org.apache.hbase.thirdparty.com.google.common.collect.Sets;
  *
  */
 @InterfaceAudience.Private
[email protected](value="IS2_INCONSISTENT_SYNC",
+  justification="Complaint is about isByTable not being synchronized; we don't 
modify often")
 public abstract class BaseLoadBalancer implements LoadBalancer {
 
   public static final String BALANCER_DECISION_BUFFER_ENABLED =
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 20037c4..da51175 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -149,7 +149,8 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
 
   private List<CandidateGenerator> candidateGenerators;
   private List<CostFunction> costFunctions; // FindBugs: Wants this protected; 
IS2_INCONSISTENT_SYNC
-
+  // To save currently configed sum of multiplier. Defaulted at 1 for cases 
that carry high cost
+  private float sumMultiplier = 1.0f;
   // to save and report costs to JMX
   private double curOverallCost = 0d;
   private double[] tempFunctionCosts;
@@ -206,7 +207,6 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     }
     regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
-
     costFunctions = new ArrayList<>();
     addCostFunction(new RegionCountSkewCostFunction(conf));
     addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
@@ -327,63 +327,65 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
   protected boolean needsBalance(TableName tableName, Cluster cluster) {
     ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Not running balancer because only " + cs.getNumServers()
-            + " active regionserver(s)");
-      }
-      if (this.isBalancerRejectionRecording) {
-        sendRejectionReasonToRingBuffer("The number of RegionServers " +
-          cs.getNumServers() + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + 
")", null);
-      }
+      LOG.info("Not running balancer because only " + cs.getNumServers() +
+        " active regionserver(s)");
+      sendRejectionReasonToRingBuffer("The number of RegionServers " + 
cs.getNumServers() +
+        " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
       return false;
     }
     if (areSomeRegionReplicasColocated(cluster)) {
+      LOG.info("Running balancer because at least one server hosts replicas of 
the same region.");
       return true;
     }
 
     if (idleRegionServerExist(cluster)){
+      LOG.info("Running balancer because cluster has idle server(s).");
       return true;
     }
 
+    sumMultiplier = 0.0f;
     double total = 0.0;
-    float sumMultiplier = 0.0f;
     for (CostFunction c : costFunctions) {
       float multiplier = c.getMultiplier();
-      if (multiplier <= 0) {
-        LOG.trace("{} not needed because multiplier is <= 0", 
c.getClass().getSimpleName());
-        continue;
-      }
+      double cost = c.cost();
       if (!c.isNeeded()) {
         LOG.trace("{} not needed", c.getClass().getSimpleName());
         continue;
       }
+      total += cost * multiplier;
       sumMultiplier += multiplier;
-      total += c.cost() * multiplier;
-    }
-
-    boolean balanced = total <= 0 || sumMultiplier <= 0 ||
-        (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
-    if(balanced && isBalancerRejectionRecording){
-      String reason = "";
-      if (total <= 0) {
-        reason = 
"(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + 
" <= 0";
-      } else if (sumMultiplier <= 0) {
-        reason = "sumMultiplier = " + sumMultiplier + " <= 0";
-      } else if ((total / sumMultiplier) < minCostNeedBalance) {
-        reason =
-          
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier
 = " + (total
-            / sumMultiplier) + " <= minCostNeedBalance(" + minCostNeedBalance 
+ ")";
-      }
-      sendRejectionReasonToRingBuffer(reason, costFunctions);
     }
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to 
need a balance is {}",
-          balanced ? "Skipping load balancing because balanced" : "We need to 
load balance",
-          isByTable ? String.format("table (%s)", tableName) : "cluster",
-          total, sumMultiplier, minCostNeedBalance);
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Balance decision detailed function costs={}", 
functionCost());
+    if (sumMultiplier <= 0) {
+      LOG.error("At least one cost function needs a multiplier > 0. For 
example, set "
+        + "hbase.master.balancer.stochastic.regionCountCost to a positive 
value or default");
+      return false;
+    }
+
+    boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+    if (balanced) {
+      if (isBalancerRejectionRecording) {
+        String reason = "";
+        if (total <= 0) {
+          reason =
+            "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = 
" + total + " <= 0";
+        } else if (sumMultiplier <= 0) {
+          reason = "sumMultiplier = " + sumMultiplier + " <= 0";
+        } else if ((total / sumMultiplier) < minCostNeedBalance) {
+          reason =
+            
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier
 = " + (
+              total / sumMultiplier) + " <= minCostNeedBalance(" + 
minCostNeedBalance + ")";
+        }
+        sendRejectionReasonToRingBuffer(reason, costFunctions);
       }
+      LOG.info("{} - skipping load balancing because weighted average 
imbalance={} <= " +
+          "threshold({}). If you want more aggressive balancing, either lower "
+          + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or 
increase the relative"
+          + " multiplier(s) of the specific cost function(s). functionCost={}",
+        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
total / sumMultiplier,
+        minCostNeedBalance, minCostNeedBalance, functionCost());
+    } else {
+      LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
+        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime);
     }
     return !balanced;
   }
@@ -419,8 +421,8 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     // Allow turning this feature off if the locality cost is not going to
     // be used in any computations.
     RegionLocationFinder finder = null;
-    if ((this.localityCost != null && this.localityCost.getMultiplier() > 0)
-        || (this.rackLocalityCost != null && 
this.rackLocalityCost.getMultiplier() > 0)) {
+    if ((this.localityCost != null && this.localityCost.getMultiplier() > 0) 
|| (
+      this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 
0)) {
       finder = this.regionFinder;
     }
 
@@ -446,21 +448,22 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     long computedMaxSteps;
     if (runMaxSteps) {
       computedMaxSteps = Math.max(this.maxSteps,
-          ((long)cluster.numRegions * (long)this.stepsPerRegion * 
(long)cluster.numServers));
+        ((long) cluster.numRegions * (long) this.stepsPerRegion * (long) 
cluster.numServers));
     } else {
-      long calculatedMaxSteps = (long)cluster.numRegions * 
(long)this.stepsPerRegion *
-          (long)cluster.numServers;
+      long calculatedMaxSteps =
+        (long) cluster.numRegions * (long) this.stepsPerRegion * (long) 
cluster.numServers;
       computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps);
       if (calculatedMaxSteps > maxSteps) {
-        LOG.warn("calculatedMaxSteps:{} for loadbalancer's stochastic walk is 
larger than "
-            + "maxSteps:{}. Hence load balancing may not work well. Setting 
parameter "
-            + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can 
overcome this issue."
-            + "(This config change does not require service restart)", 
calculatedMaxSteps,
-            maxSteps);
+        LOG.warn("calculatedMaxSteps:{} for loadbalancer's stochastic walk is 
larger than " +
+            "maxSteps:{}. Hence load balancing may not work well. Setting 
parameter " +
+            "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can 
overcome this issue." +
+            "(This config change does not require service restart)", 
calculatedMaxSteps,
+          maxSteps);
       }
     }
-    LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost 
+ ", functionCost="
-        + functionCost() + " computedMaxSteps: " + computedMaxSteps);
+    LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average 
imbalance={}," +
+        " functionCost={} computedMaxSteps={}",
+      currentCost / sumMultiplier, functionCost(), computedMaxSteps);
 
     final String initFunctionTotalCosts = totalCostsPerFunc();
     // Perform a stochastic walk to see if we can get a good fit.
@@ -493,8 +496,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
         updateCostsWithAction(cluster, undoAction);
       }
 
-      if (EnvironmentEdgeManager.currentTime() - startTime >
-          maxRunningTime) {
+      if (EnvironmentEdgeManager.currentTime() - startTime > maxRunningTime) {
         break;
       }
     }
@@ -506,17 +508,18 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
     if (initCost > currentCost) {
       plans = createRegionPlans(cluster);
-      LOG.info("Finished computing new load balance plan. Computation took {}" 
+
-        " to try {} different iterations.  Found a solution that moves " +
-        "{} regions; Going from a computed cost of {}" +
-        " to a new cost of {}", java.time.Duration.ofMillis(endTime - 
startTime),
-        step, plans.size(), initCost, currentCost);
+      LOG.info("Finished computing new moving plan. Computation took {} ms" +
+          " to try {} different iterations.  Found a solution that moves " +
+          "{} regions; Going from a computed imbalance of {}" + " to a new 
imbalance of {}. ",
+        endTime - startTime, step, plans.size(), initCost / sumMultiplier,
+        currentCost / sumMultiplier);
+
       sendRegionPlansToRingBuffer(plans, currentCost, initCost, 
initFunctionTotalCosts, step);
       return plans;
     }
-    LOG.info("Could not find a better load balance plan.  Tried {} different 
configurations in " +
-      "{}, and did not find anything with a computed cost less than {}", step,
-      java.time.Duration.ofMillis(endTime - startTime), initCost);
+    LOG.info("Could not find a better moving plan.  Tried {} different 
configurations in "
+        + "{} ms, and did not find anything with an imbalance score less than 
{}", step,
+      endTime - startTime, initCost / sumMultiplier);
     return null;
   }
 
@@ -527,8 +530,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
         .setReason(reason);
       if (costFunctions != null) {
         for (CostFunction c : costFunctions) {
-          float multiplier = c.getMultiplier();
-          if (multiplier <= 0 || !c.isNeeded()) {
+          if (!c.isNeeded()) {
             continue;
           }
           builder.addCostFuncInfo(c.getClass().getName(), c.cost(), 
c.getMultiplier());
@@ -587,7 +589,8 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
   }
 
   private void addCostFunction(CostFunction costFunction) {
-    if (costFunction.getMultiplier() > 0) {
+    float multiplier = costFunction.getMultiplier();
+    if (multiplier > 0) {
       costFunctions.add(costFunction);
     }
   }
@@ -598,9 +601,13 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
       builder.append(c.getClass().getSimpleName());
       builder.append(" : (");
       if (c.isNeeded()) {
-        builder.append(c.getMultiplier());
+        builder.append("multiplier=" + c.getMultiplier());
         builder.append(", ");
-        builder.append(c.cost());
+        double cost = c.cost();
+        builder.append("imbalance=" + cost);
+        if (cost < minCostNeedBalance) {
+          builder.append(", balanced");
+        }
       } else {
         builder.append("not needed");
       }
@@ -612,7 +619,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
   private String totalCostsPerFunc() {
     StringBuilder builder = new StringBuilder();
     for (CostFunction c : costFunctions) {
-      if (c.getMultiplier() <= 0 || !c.isNeeded()) {
+      if (!c.isNeeded()) {
         continue;
       }
       double cost = c.getMultiplier() * c.cost();
@@ -696,7 +703,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
   void updateCostsWithAction(Cluster cluster, Action action) {
     for (CostFunction c : costFunctions) {
-      if (c.getMultiplier() > 0 && c.isNeeded()) {
+      if (c.isNeeded()) {
         c.postAction(action);
       }
     }
@@ -735,7 +742,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
       CostFunction c = costFunctions.get(i);
       this.tempFunctionCosts[i] = 0.0;
 
-      if (c.getMultiplier() <= 0 || !c.isNeeded()) {
+      if (!c.isNeeded()) {
         continue;
       }
 

Reply via email to