rmdmattingly commented on code in PR #6593:
URL: https://github.com/apache/hbase/pull/6593#discussion_r1909330627


##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, 
BalancerClusterState cluster) {
       return true;
     }
 
-    if (sloppyRegionServerExist(cs)) {
+    if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && 
sloppyRegionServerExist(cs)) {
       LOG.info("Running balancer because cluster has sloppy server(s)." + " 
function cost={}",
         functionCost());
       return true;
     }
 
+    if (balancerConditionals.shouldRunBalancer(cluster)) {
+      LOG.info("Running balancer because conditional candidate generators have 
important moves");
+      return true;
+    }
+
     double total = 0.0;
+    double multiplierTotal = 0; // this.sumMultiplier is not necessarily 
initialized at this point
     for (CostFunction c : costFunctions) {
       if (!c.isNeeded()) {
         LOG.trace("{} not needed", c.getClass().getSimpleName());
         continue;
       }
       total += c.cost() * c.getMultiplier();
+      multiplierTotal += c.getMultiplier();
     }
-    boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+    boolean balanced = (total / multiplierTotal < minCostNeedBalance);
 
     if (balanced) {
       final double calculatedTotal = total;
       sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, 
sumMultiplier),
         costFunctions);
       LOG.info(
         "{} - skipping load balancing because weighted average imbalance={} <= 
"
-          + "threshold({}). If you want more aggressive balancing, either 
lower "
+          + "threshold({}) and conditionals do not have opinionated move 
candidates. "
+          + "consecutive balancer runs. If you want more aggressive balancing, 
either lower "
           + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or 
increase the relative "
           + "multiplier(s) of the specific cost function(s). functionCost={}",
         isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
total / sumMultiplier,
         minCostNeedBalance, minCostNeedBalance, functionCost());
     } else {
-      LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
-        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime);
+      LOG.info(
+        "{} - Calculating plan. may take up to {}ms to complete. 
currentCost={}, targetCost={}",
+        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime, total,
+        minCostNeedBalance);
     }
     return !balanced;
   }
 
   @RestrictedApi(explanation = "Should only be called in tests", link = "",
       allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
-  BalanceAction nextAction(BalancerClusterState cluster) {
-    return getRandomGenerator().generate(cluster);
+  Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState 
cluster) {
+    CandidateGenerator generator = getRandomGenerator(cluster);
+    return Pair.newPair(generator, generator.generate(cluster));

Review Comment:
   Returning a pair here allows for better logging of generator activity, which 
can be nice when debugging the balancer's decision making



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -372,70 +407,108 @@ boolean needsBalance(TableName tableName, 
BalancerClusterState cluster) {
       return true;
     }
 
-    if (sloppyRegionServerExist(cs)) {
+    if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && 
sloppyRegionServerExist(cs)) {
       LOG.info("Running balancer because cluster has sloppy server(s)." + " 
function cost={}",
         functionCost());
       return true;
     }
 
+    if (balancerConditionals.shouldRunBalancer(cluster)) {
+      LOG.info("Running balancer because conditional candidate generators have 
important moves");
+      return true;
+    }
+
     double total = 0.0;
+    double multiplierTotal = 0; // this.sumMultiplier is not necessarily 
initialized at this point
     for (CostFunction c : costFunctions) {
       if (!c.isNeeded()) {
         LOG.trace("{} not needed", c.getClass().getSimpleName());
         continue;
       }
       total += c.cost() * c.getMultiplier();
+      multiplierTotal += c.getMultiplier();
     }
-    boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+    boolean balanced = (total / multiplierTotal < minCostNeedBalance);
 
     if (balanced) {
       final double calculatedTotal = total;
       sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, 
sumMultiplier),
         costFunctions);
       LOG.info(
         "{} - skipping load balancing because weighted average imbalance={} <= 
"
-          + "threshold({}). If you want more aggressive balancing, either 
lower "
+          + "threshold({}) and conditionals do not have opinionated move 
candidates. "
+          + "consecutive balancer runs. If you want more aggressive balancing, 
either lower "
           + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or 
increase the relative "
           + "multiplier(s) of the specific cost function(s). functionCost={}",
         isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
total / sumMultiplier,
         minCostNeedBalance, minCostNeedBalance, functionCost());
     } else {
-      LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
-        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime);
+      LOG.info(
+        "{} - Calculating plan. may take up to {}ms to complete. 
currentCost={}, targetCost={}",
+        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime, total,
+        minCostNeedBalance);
     }
     return !balanced;
   }
 
   @RestrictedApi(explanation = "Should only be called in tests", link = "",
       allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
-  BalanceAction nextAction(BalancerClusterState cluster) {
-    return getRandomGenerator().generate(cluster);
+  Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState 
cluster) {
+    CandidateGenerator generator = getRandomGenerator(cluster);
+    return Pair.newPair(generator, generator.generate(cluster));
   }
 
   /**
    * Select the candidate generator to use based on the cost of cost 
functions. The chance of
    * selecting a candidate generator is propotional to the share of cost of 
all cost functions among
    * all cost functions that benefit from it.
    */
-  protected CandidateGenerator getRandomGenerator() {

Review Comment:
   I had to rewrite this method, largely because of the refactor to a Map of 
generators and their weights, rather than lists. Anyway, this also fixes a bug 
in this method: it modifies the weightsOfGenerators array in place. That is:
   
   1. It first computes a running sum in weightsOfGenerators[i].
   2. It then divides those values by the total to get a cumulative 
distribution.
   3. It uses this (mutated) array to select a generator.
   4. But the original weights are overwritten and never restored.
   
   As a result, this method can produce an unfair distribution of generator 
activity. Maybe in practice it was okay because we called 
`updateCostsAndWeightsWithAction` frequently enough and that implicitly resets 
weights — but I think in reality, it was likely that we ran into NULL_ACTIONs 
that manifested as no-ops frequently, and that produced skewed generator 
weights. Anyway, this was fragile and buggy, and it should be better and more 
fair now



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java:
##########
@@ -122,6 +129,14 @@ class BalancerClusterState {
   // Maps regionName -> oldServerName -> cache ratio of the region on the old 
server
   Map<String, Pair<ServerName, Float>> regionCacheRatioOnOldServerMap;
 
+  private Supplier<List<Integer>> shuffledServerIndicesSupplier =
+    Suppliers.memoizeWithExpiration(() -> {
+      Collection<Integer> serverIndices = serversToIndex.values();
+      List<Integer> shuffledServerIndices = new ArrayList<>(serverIndices);
+      Collections.shuffle(shuffledServerIndices);
+      return shuffledServerIndices;
+    }, 5, TimeUnit.SECONDS);

Review Comment:
   I often found that it was nice to let opinionated candidate generators 
iterate through servers in different orders on subsequent runs, so this saves 
some cycles across generators that may want that (replica distribution and 
table isolation both rely on this atm) 



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java:
##########
@@ -899,6 +970,48 @@ int[] addRegion(int[] regions, int regionIndex) {
     return newRegions;
   }
 
+  int[] removeRegions(int[] regions, Set<Integer> regionIndicesToRemove) {
+    // Calculate the size of the new regions array
+    int newSize = regions.length - regionIndicesToRemove.size();
+    if (newSize < 0) {
+      throw new IllegalStateException(
+        "Region indices mismatch: more regions to remove than in the regions 
array");
+    }
+
+    int[] newRegions = new int[newSize];
+    int newIndex = 0;
+
+    // Copy only the regions not in the removal set
+    for (int region : regions) {
+      if (!regionIndicesToRemove.contains(region)) {
+        newRegions[newIndex++] = region;
+      }
+    }
+
+    // If the newIndex is smaller than newSize, some regions were missing from 
the input array
+    if (newIndex != newSize) {
+      throw new IllegalStateException("Region indices mismatch: some regions 
in the removal "
+        + "set were not found in the regions array");
+    }
+
+    return newRegions;
+  }
+
+  int[] addRegions(int[] regions, Set<Integer> regionIndicesToAdd) {
+    int[] newRegions = new int[regions.length + regionIndicesToAdd.size()];
+
+    // Copy the existing regions to the new array
+    System.arraycopy(regions, 0, newRegions, 0, regions.length);
+
+    // Add the new regions at the end of the array
+    int newIndex = regions.length;
+    for (int regionIndex : regionIndicesToAdd) {
+      newRegions[newIndex++] = regionIndex;
+    }
+
+    return newRegions;
+  }

Review Comment:
   These methods make it easier to support the MoveBatch action that might 
remove or add several regions to a server



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditional.java:
##########
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.yetus.audience.InterfaceStability;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public abstract class RegionPlanConditional {

Review Comment:
   The RegionPlanConditional is a deliberately restrictive, but still powerful, 
view into the implications of a move. Each RegionPlanConditional should 
implement `isViolatingServer(RegionPlan regionPlan, Set<RegionInfo> 
destinationRegions)`, `isViolatingHost`, and `isViolatingRack` as necessary to 
ensure that RegionPlans don't conflict with its intentions for the given 
destination, based on the regions already on said destination.
   
   To help ensure that we always find a path to success, RegionPlanConditionals 
can also provide a List of specialized CandidateGenerators that will be 
prioritized over our OOTB generators. This ensures that, even as clusters 
scale, we aren't relying on randomness to _eventually_ lead us to an ideal 
solution.



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, 
BalancerClusterState cluster) {
       return true;
     }
 
-    if (sloppyRegionServerExist(cs)) {
+    if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && 
sloppyRegionServerExist(cs)) {
       LOG.info("Running balancer because cluster has sloppy server(s)." + " 
function cost={}",
         functionCost());
       return true;
     }
 
+    if (balancerConditionals.shouldRunBalancer(cluster)) {
+      LOG.info("Running balancer because conditional candidate generators have 
important moves");
+      return true;
+    }
+
     double total = 0.0;
+    double multiplierTotal = 0; // this.sumMultiplier is not necessarily 
initialized at this point
     for (CostFunction c : costFunctions) {
       if (!c.isNeeded()) {
         LOG.trace("{} not needed", c.getClass().getSimpleName());
         continue;
       }
       total += c.cost() * c.getMultiplier();
+      multiplierTotal += c.getMultiplier();
     }
-    boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+    boolean balanced = (total / multiplierTotal < minCostNeedBalance);

Review Comment:
   Another bug I found is that `sumMultiplier` might not be initialized at this 
point. As a result, we should just build the multiplier on the fly as we 
iterate through cost functions anyway. This ensures that our `boolean balanced` 
derivation will have the necessary inputs available 



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableColocationCandidateGenerator.java:
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This will generate candidates that colocate a table on the number of 
RegionServers equal to its
+ * number of replicas. For example, this is useful when isolating system 
tables.
+ */
+@InterfaceAudience.Private
+public final class TableColocationCandidateGenerator

Review Comment:
   System table isolation is one thing, but we also want colocation of any 
tables that we're isolation. For example, if we have 10 system table regions 
that we've isolated, and they're on 10 different servers, then it feels 
wasteful to have 10 servers dedicated to hosting only 1 region of said system 
table. This generator will ensure that we smartly find our way to a plan that 
colocates the given system table on one server (or _n_ servers, if the table 
has >1 replica)



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java:
##########
@@ -28,6 +28,7 @@ enum Type {
     ASSIGN_REGION,
     MOVE_REGION,
     SWAP_REGIONS,
+    MOVE_BATCH,

Review Comment:
   Conditional candidate generators might need to make a series of moves, 
depending on how complex the path to success is. This new action type allows 
generators to string together a series of moves



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java:
##########
@@ -705,7 +720,41 @@ enum LocalityType {
     RACK
   }
 
-  public void doAction(BalanceAction action) {
+  public List<RegionPlan> convertActionToPlans(BalanceAction action) {

Review Comment:
   It's easier to reason about what a RegionPlan is, vs what a BalanceAction 
is. So the new conditionals are intentionally RegionPlanConditionals that 
expect to work with only RegionPlans as the unified representation for what any 
BalanceAction might want to do.
   
   As a result, I needed a way to transform BalanceActions to RegionPlans 
_without_ running the action against the BCS



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java:
##########
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.lang.reflect.Constructor;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
+
+/**
+ * Balancer conditionals supplement cost functions in the {@link 
StochasticLoadBalancer}. Cost
+ * functions are insufficient and difficult to work with when making discrete 
decisions; this is
+ * because they operate on a continuous scale, and each cost function's 
multiplier affects the
+ * relative importance of every other cost function. So it is difficult to 
meaningfully and clearly
+ * value many aspects of your region distribution via cost functions alone. 
Conditionals allow you
+ * to very clearly define discrete rules that your balancer would ideally 
follow. To clarify, a
+ * conditional violation will not block a region assignment because we would 
prefer to have uptime
+ * than have perfectly intentional balance. But conditionals allow you to, for 
example, define that
+ * a region's primary and secondary should not live on the same rack. Another 
example, conditionals
+ * make it easy to define that system tables will ideally be isolated on their 
own RegionServer
+ * (without needing to manage distinct RegionServer groups). Use of 
conditionals may cause an
+ * extremely unbalanced cluster to exceed its max balancer runtime. This is 
necessary because
+ * conditional candidate generation is quite expensive, and cutting it off 
early could prevent us
+ * from finding a solution.
+ */
+@InterfaceAudience.Private
+final class BalancerConditionals implements Configurable {

Review Comment:
   This class is statically available via the package protected 
`BalancerConditionals.INSTANCE`, and it exists to be a unified interface into 
all conditionals that are configured in the current balancer run



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -157,14 +160,17 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
   private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
   private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
 
-  protected List<CandidateGenerator> candidateGenerators;
+  protected Map<Class<? extends CandidateGenerator>, CandidateGenerator> 
candidateGenerators;
+  private Map<Class<? extends CandidateGenerator>, Double> weightsOfGenerators;
+  private final Supplier<List<Class<? extends CandidateGenerator>>> 
shuffledGeneratorClasses =
+    Suppliers.memoizeWithExpiration(() -> {
+      List<Class<? extends CandidateGenerator>> shuffled =
+        new ArrayList<>(candidateGenerators.keySet());
+      Collections.shuffle(shuffled);
+      return shuffled;
+    }, 5, TimeUnit.SECONDS);

Review Comment:
   In my work here, I realized that we biased towards certain candidate 
generators. This reshuffling of generator order is now possible because I've 
removed our reliance on enum ordinals, and helps to provide a more earnestly 
stochastic approach when not relying on conditional candidate generators



##########
hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java:
##########
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.balancer;
+
+import static 
org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY;
+import static 
org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public final class CandidateGeneratorTestUtil {

Review Comment:
   The rest of the changes here, probably at least half of this PR, are tests. 
I've setup minicluster tests to validate that clusters startup and balance as 
intended with conditionals enabled, and I've setup simpler tests that validate 
the StochasticLoadBalancer's ability to get out of very hairy, high scale, 
imbalances via conditionals and their generators. I've setup tests that 
validate a wide variety of conditional usage, and that all conditionals 
implemented in this PR play nicely with one another.



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -213,16 +219,18 @@ private void loadCustomCostFunctions(Configuration conf) {
 
   @RestrictedApi(explanation = "Should only be called in tests", link = "",
       allowedOnPath = ".*/src/test/.*")
-  List<CandidateGenerator> getCandidateGenerators() {
+  Map<Class<? extends CandidateGenerator>, CandidateGenerator> 
getCandidateGenerators() {

Review Comment:
   As mentioned above, I moved away from this system of candidate generators 
ordered by the GeneratorType enum ordinal because I thought that was a fragile 
setup, and because it was necessary to provide more fair generator usage.
   
   This Map approach would also allow us to offer pluggable candidate 
generators via some `...additionalGenerators` config. I think that could be a 
nice way to let users write some custom balancer code, without the complexities 
associated with designing cost functions or building their own custom balancer. 
That said, I've not included this pluggable generator idea in this PR.



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -332,8 +342,33 @@ void updateMetricsSize(int size) {
   }
 
   private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
+    if (!c.hasRegionReplicas || 
balancerConditionals.isReplicaDistributionEnabled()) {
+      // This check is unnecessary without replicas, or with conditional 
replica distribution
+      // The balancer will auto-run if conditional replica distribution 
candidates are available
+      return false;
+    }
+
+    // Check host
     regionReplicaHostCostFunction.prepare(c);
-    return (Math.abs(regionReplicaHostCostFunction.cost()) > 
CostFunction.COST_EPSILON);
+    double hostCost = Math.abs(regionReplicaHostCostFunction.cost());
+    boolean colocatedAtHost = hostCost > CostFunction.COST_EPSILON;
+    if (colocatedAtHost) {
+      return true;
+    }
+    LOG.trace("No host colocation detected with host cost={}", hostCost);
+
+    // Check rack
+    regionReplicaRackCostFunction.prepare(c);
+    double rackCost = Math.abs(regionReplicaRackCostFunction.cost());
+    boolean colocatedAtRack =
+      (Math.abs(regionReplicaRackCostFunction.cost()) > 
CostFunction.COST_EPSILON);
+    if (colocatedAtRack) {
+      return true;
+    }
+    LOG.trace("No rack colocation detected with rack cost={}", rackCost);
+
+    return DistributeReplicasCandidateGenerator.INSTANCE.generateCandidate(c, 
true)
+        != BalanceAction.NULL_ACTION;
   }

Review Comment:
   This method was never great, because it only bothered to check colocation on 
hosts. It never bothered to check racks. I've fixed that here, while also 
making it a no-op when using conditional-based replica distribution (which 
makes this check unnecessary — the balancer will always run if conditional 
candidate generators have work to do)



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, 
BalancerClusterState cluster) {
       return true;
     }
 
-    if (sloppyRegionServerExist(cs)) {
+    if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && 
sloppyRegionServerExist(cs)) {
       LOG.info("Running balancer because cluster has sloppy server(s)." + " 
function cost={}",
         functionCost());
       return true;
     }
 
+    if (balancerConditionals.shouldRunBalancer(cluster)) {
+      LOG.info("Running balancer because conditional candidate generators have 
important moves");
+      return true;
+    }
+
     double total = 0.0;
+    double multiplierTotal = 0; // this.sumMultiplier is not necessarily 
initialized at this point
     for (CostFunction c : costFunctions) {
       if (!c.isNeeded()) {
         LOG.trace("{} not needed", c.getClass().getSimpleName());
         continue;
       }
       total += c.cost() * c.getMultiplier();
+      multiplierTotal += c.getMultiplier();
     }
-    boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+    boolean balanced = (total / multiplierTotal < minCostNeedBalance);
 
     if (balanced) {
       final double calculatedTotal = total;
       sendRejectionReasonToRingBuffer(() -> getBalanceReason(calculatedTotal, 
sumMultiplier),
         costFunctions);
       LOG.info(
         "{} - skipping load balancing because weighted average imbalance={} <= 
"
-          + "threshold({}). If you want more aggressive balancing, either 
lower "
+          + "threshold({}) and conditionals do not have opinionated move 
candidates. "
+          + "consecutive balancer runs. If you want more aggressive balancing, 
either lower "
           + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or 
increase the relative "
           + "multiplier(s) of the specific cost function(s). functionCost={}",
         isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
total / sumMultiplier,
         minCostNeedBalance, minCostNeedBalance, functionCost());
     } else {
-      LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
-        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime);
+      LOG.info(
+        "{} - Calculating plan. may take up to {}ms to complete. 
currentCost={}, targetCost={}",
+        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
maxRunningTime, total,
+        minCostNeedBalance);
     }
     return !balanced;
   }
 
   @RestrictedApi(explanation = "Should only be called in tests", link = "",
       allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
-  BalanceAction nextAction(BalancerClusterState cluster) {
-    return getRandomGenerator().generate(cluster);
+  Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState 
cluster) {
+    CandidateGenerator generator = getRandomGenerator(cluster);
+    return Pair.newPair(generator, generator.generate(cluster));
   }
 
   /**
    * Select the candidate generator to use based on the cost of cost 
functions. The chance of
    * selecting a candidate generator is propotional to the share of cost of 
all cost functions among
    * all cost functions that benefit from it.
    */
-  protected CandidateGenerator getRandomGenerator() {
-    double sum = 0;
-    for (int i = 0; i < weightsOfGenerators.length; i++) {
-      sum += weightsOfGenerators[i];
-      weightsOfGenerators[i] = sum;
+  protected CandidateGenerator getRandomGenerator(BalancerClusterState 
cluster) {
+    // Prefer conditional generators if they have moves to make
+    if (balancerConditionals.isConditionalBalancingEnabled()) {
+      for (RegionPlanConditional conditional : 
balancerConditionals.getConditionals()) {
+        List<RegionPlanConditionalCandidateGenerator> generators =
+          conditional.getCandidateGenerators();
+        for (RegionPlanConditionalCandidateGenerator generator : generators) {
+          if (generator.getWeight(cluster) > 0) {
+            return generator;
+          }
+        }
+      }
+    }

Review Comment:
   We will always prefer conditional generator moves if they exist



##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java:
##########
@@ -372,70 +407,125 @@ boolean needsBalance(TableName tableName, 
BalancerClusterState cluster) {
       return true;
     }
 
-    if (sloppyRegionServerExist(cs)) {
+    if (!balancerConditionals.shouldSkipSloppyServerEvaluation() && 
sloppyRegionServerExist(cs)) {

Review Comment:
   Because conditional balancing may deliberately produce sloppy servers that 
we have no intention of, or path for, fixing, I've opted to disable this 
trigger when conditional balancing is enabled. I think this is an appropriate 
tradeoff



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to