This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.6 by this push:
     new 67785255bd0 HBASE-30135: Improve CacheAwareLoadBalancer to simulate 
low cache ratio regions as cached in candidate servers with enough cache space 
(#8221) (#8250)
67785255bd0 is described below

commit 67785255bd0b0e37ed324f46ebabb50c380c3887
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Mon May 18 17:26:44 2026 +0100

    HBASE-30135: Improve CacheAwareLoadBalancer to simulate low cache ratio 
regions as cached in candidate servers with enough cache space (#8221) (#8250)
    
    Signed-off-by: Peter Somogyi <[email protected]>
    Change-Id: Ib72f86a0acab9f16da7be2ac7a238693cf41e07d
---
 .../master/balancer/BalancerClusterState.java      |  38 ++++--
 .../hbase/master/balancer/BalancerRegionLoad.java  |   6 +
 .../master/balancer/CacheAwareLoadBalancer.java    | 152 ++++++++++++++++-----
 .../master/balancer/StochasticLoadBalancer.java    |   9 ++
 .../balancer/TestCacheAwareLoadBalancer.java       |  66 +++++++++
 .../TestCacheAwareLoadBalancerCostFunctions.java   |  14 +-
 6 files changed, 234 insertions(+), 51 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 7061a3882e8..6a5237dd4c2 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -518,7 +518,29 @@ class BalancerClusterState {
     if (load == null) {
       return 0;
     }
-    return regionLoads[region].getLast().getStorefileSizeMB();
+    return load.getLast().getStorefileSizeMB();
+  }
+
+  /**
+   * Finds and return the sum of latest reported cache ratio and cold data 
ratio for the region on
+   * the RegionServer it's currently online.
+   */
+  float getSumRegionCacheAndColdDataRatio(int region) {
+    Deque<BalancerRegionLoad> dq = regionLoads[region];
+    if (dq == null || dq.isEmpty()) {
+      return 0.0f;
+    }
+    BalancerRegionLoad load = dq.getLast();
+    return load.getCurrentRegionCacheRatio() + load.getRegionColdDataRatio();
+  }
+
+  int getRegionSizeMinusColdDataMB(int region) {
+    Deque<BalancerRegionLoad> dq = regionLoads[region];
+    if (dq == null || dq.isEmpty()) {
+      return 0;
+    }
+    BalancerRegionLoad load = dq.getLast();
+    return load.getRegionSizeMB() - (int) (load.getRegionSizeMB() * 
load.getRegionColdDataRatio());
   }
 
   /**
@@ -563,23 +585,11 @@ class BalancerClusterState {
 
   }
 
-  /**
-   * Returns the size of hFiles from the most recent RegionLoad for region
-   */
-  public int getTotalRegionHFileSizeMB(int region) {
-    Deque<BalancerRegionLoad> load = regionLoads[region];
-    if (load == null) {
-      // This means, that the region has no actual data on disk
-      return 0;
-    }
-    return regionLoads[region].getLast().getRegionSizeMB();
-  }
-
   /**
    * Returns the weighted cache ratio of a region on the given region server
    */
   public float getOrComputeWeightedRegionCacheRatio(int region, int server) {
-    return getTotalRegionHFileSizeMB(region) * 
getOrComputeRegionCacheRatio(region, server);
+    return getRegionSizeMinusColdDataMB(region) * 
getOrComputeRegionCacheRatio(region, server);
   }
 
   /**
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
index 2a4f5977e00..f6c0e1c3ffc 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
@@ -35,6 +35,7 @@ class BalancerRegionLoad {
   private final int storefileSizeMB;
   private final int regionSizeMB;
   private final float currentRegionPrefetchRatio;
+  private final float regionColdDataRatio;
 
   BalancerRegionLoad(RegionMetrics regionMetrics) {
     readRequestsCount = regionMetrics.getReadRequestCount();
@@ -43,6 +44,7 @@ class BalancerRegionLoad {
     storefileSizeMB = (int) 
regionMetrics.getStoreFileSize().get(Size.Unit.MEGABYTE);
     regionSizeMB = (int) 
regionMetrics.getRegionSizeMB().get(Size.Unit.MEGABYTE);
     currentRegionPrefetchRatio = regionMetrics.getCurrentRegionCachedRatio();
+    regionColdDataRatio = regionMetrics.getCurrentRegionColdDataRatio();
   }
 
   public long getReadRequestsCount() {
@@ -68,4 +70,8 @@ class BalancerRegionLoad {
   public float getCurrentRegionCacheRatio() {
     return currentRegionPrefetchRatio;
   }
+
+  public float getRegionColdDataRatio() {
+    return regionColdDataRatio;
+  }
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
index 9e0ad64532b..7796a499030 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
@@ -28,6 +28,7 @@ package org.apache.hadoop.hbase.master.balancer;
 
 import static 
org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY;
 
+import java.math.BigDecimal;
 import java.text.DecimalFormat;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@@ -37,6 +38,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 import org.apache.hadoop.conf.Configuration;
@@ -106,6 +108,12 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     CACHE_RATIO
   }
 
+  private float lowCacheRatioThreshold;
+  private float potentialCacheRatioAfterMove;
+  private float minFreeCacheSpaceFactor;
+
+  private BigDecimal simulatedRatio = BigDecimal.ZERO;
+
   @Override
   public void loadConf(Configuration configuration) {
     // If balance is running, store configuration in pendingConfiguration and 
return immediately.
@@ -127,6 +135,12 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     ratioThreshold =
       this.configuration.getFloat(CACHE_RATIO_THRESHOLD, 
CACHE_RATIO_THRESHOLD_DEFAULT);
     sleepTime = configuration.getLong(MOVE_THROTTLING, 
MOVE_THROTTLING_DEFAULT.toMillis());
+    lowCacheRatioThreshold = 
configuration.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY,
+      LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+    potentialCacheRatioAfterMove = 
configuration.getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY,
+      POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT);
+    minFreeCacheSpaceFactor =
+      configuration.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY, 
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
   }
 
   /**
@@ -250,15 +264,13 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
             int regionSizeMB =
               
regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond();
             // The coldDataSize accounts for data size classified as "cold" by 
DataTieringManager,
-            // which should be kept out of cache. We sum cold region size in 
the cache ratio, as we
+            // which should be kept out of cache. We calculate cache ratio on 
old server based
+            // only on the hot data size for the region (regionSizeMB - 
coldDataSize), as we
             // don't want to move regions with low cache ratio due to data 
classified as cold.
-            float regionCacheRatioOnOldServer =
-              regionSizeMB
-                  == 0
-                    ? 0.0f
-                    : (float) (regionSizeInCache
-                      + 
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0))
-                      / regionSizeMB;
+            int coldDataSize = 
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0);
+            float regionCacheRatioOnOldServer = (regionSizeMB - coldDataSize) 
<= 0
+              ? 0.0f
+              : (float) regionSizeInCache / (regionSizeMB - coldDataSize);
             regionCacheRatioOnOldServerMap.put(regionEncodedName,
               new Pair<>(sn, regionCacheRatioOnOldServer));
           }
@@ -335,6 +347,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
   private class CacheAwareCandidateGenerator extends CandidateGenerator {
     @Override
     protected BalanceAction generate(BalancerClusterState cluster) {
+      simulatedRatio = BigDecimal.ZERO;
       // Move the regions to the servers they were previously hosted on based 
on the cache ratio
       if (
         !regionCacheRatioOnOldServerMap.isEmpty()
@@ -374,6 +387,50 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
         regionCacheRatioOnOldServerMap.remove(regionEncodedName);
         return action;
       }
+      return generatePlanForFreeCacheSpace(cluster);
+    }
+
+    private BalanceAction generatePlanForFreeCacheSpace(BalancerClusterState 
cluster) {
+      if (cluster.serverBlockCacheFreeSize == null) {
+        return BalanceAction.NULL_ACTION;
+      }
+      List<BalanceAction> possibleActions = new ArrayList<>();
+      Map<Integer, Long> serverFreeCacheAfterAction = new HashMap<>();
+      for (int region = 0; region < cluster.numRegions; region++) {
+        RegionInfo regionInfo = cluster.regions[region];
+        if (regionInfo.isMetaRegion() || 
regionInfo.getTable().isSystemTable()) {
+          continue;
+        }
+        int currentServer = cluster.regionIndexToServerIndex[region];
+        float ratio = cluster.getSumRegionCacheAndColdDataRatio(region);
+        if (ratio >= lowCacheRatioThreshold) {
+          continue;
+        }
+        int regionSizeMb = cluster.getRegionSizeMinusColdDataMB(region);
+        if (regionSizeMb <= 0) {
+          continue;
+        }
+        long bytesNeeded = (long) (regionSizeMb * 1024L * 1024L * 
minFreeCacheSpaceFactor);
+        for (int server = 0; server < cluster.numServers; server++) {
+          // Skips current server for region, as we can't generate a move to 
same server
+          if (server == currentServer) {
+            continue;
+          }
+          serverFreeCacheAfterAction.putIfAbsent(server, 
cluster.serverBlockCacheFreeSize[server]);
+          if (serverFreeCacheAfterAction.get(server) >= bytesNeeded) {
+            serverFreeCacheAfterAction.compute(server, (s, freeCache) -> 
freeCache - bytesNeeded);
+            possibleActions.add(getAction(currentServer, region, server, -1));
+          }
+        }
+      }
+      if (!possibleActions.isEmpty()) {
+        BalanceAction action =
+          
possibleActions.get(ThreadLocalRandom.current().nextInt(possibleActions.size()));
+        LOG.debug("region {} had sum ratio {}",
+          cluster.regions[((MoveRegionAction) 
action).getRegion()].getEncodedName(),
+          cluster.getSumRegionCacheAndColdDataRatio(((MoveRegionAction) 
action).getRegion()));
+        return action;
+      }
       return BalanceAction.NULL_ACTION;
     }
 
@@ -383,7 +440,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
       return moveRegionToOldServer(cluster, regionIndex, currentServerIndex,
         cacheRatioOnCurrentServer, oldServerIndex, cacheRatioOnOldServer)
           ? getAction(currentServerIndex, regionIndex, oldServerIndex, -1)
-          : BalanceAction.NULL_ACTION;
+          : generatePlanForFreeCacheSpace(cluster);
     }
 
     private boolean moveRegionToOldServer(BalancerClusterState cluster, int 
regionIndex,
@@ -454,6 +511,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
   private class CacheAwareSkewnessCandidateGenerator extends 
LoadCandidateGenerator {
     @Override
     BalanceAction pickRandomRegions(BalancerClusterState cluster, int 
thisServer, int otherServer) {
+      simulatedRatio = BigDecimal.ZERO;
       // First move all the regions which were hosted previously on some other 
server back to their
       // old servers
       if (
@@ -588,7 +646,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     }
   }
 
-  static class CacheAwareCostFunction extends CostFunction {
+  class CacheAwareCostFunction extends CostFunction {
     private static final String CACHE_COST_KEY = 
"hbase.master.balancer.stochastic.cacheCost";
     private double cacheRatio;
     private double bestCacheRatio;
@@ -630,14 +688,13 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
         currentSum += currentWeighted[region];
         // here we only get the server index where this region cache ratio is 
the highest
         int serverIndexBestCache = 
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+        // get the highest cacheRatio for this region on the current state of 
allocations
         double currentHighestCache =
           cluster.getOrComputeWeightedRegionCacheRatio(region, 
serverIndexBestCache);
         // Get a hypothetical best cache ratio for this region if any server 
has enough free cache
         // to host it.
-        double potentialHighestCache =
-          potentialBestWeightedFromFreeCache(cluster, region, 
currentHighestCache);
-        double actualHighest = Math.max(currentHighestCache, 
potentialHighestCache);
-        bestCacheSum += actualHighest;
+        double potentialHighestCache = 
potentialBestWeightedFromFreeCache(cluster, region);
+        bestCacheSum += Math.max(currentHighestCache, potentialHighestCache);
       }
       bestCacheRatio = bestCacheSum;
       if (bestCacheSum <= 0.0) {
@@ -652,11 +709,24 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
       double[] contrib = new double[totalRegions];
       for (int r = 0; r < totalRegions; r++) {
         int s = cluster.regionIndexToServerIndex[r];
-        int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+        int sizeMb = cluster.getRegionSizeMinusColdDataMB(r);
         if (sizeMb <= 0) {
           contrib[r] = 0.0;
           continue;
         }
+        boolean movedInSimulation = cluster.initialRegionIndexToServerIndex[r] 
!= s;
+        if (
+          cluster.serverBlockCacheFreeSize != null && movedInSimulation
+            && cluster.getSumRegionCacheAndColdDataRatio(r) < 
lowCacheRatioThreshold
+        ) {
+          LOG.debug("Region {} is simulated moved to new server {}",
+            cluster.regions[r].getEncodedName(), 
cluster.servers[s].getHostname());
+          long bytesNeeded = (long) (sizeMb * 1024L * 1024L * 
minFreeCacheSpaceFactor);
+          if (cluster.serverBlockCacheFreeSize[s] >= bytesNeeded) {
+            contrib[r] = sizeMb * potentialCacheRatioAfterMove;
+            continue;
+          }
+        }
         contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
       }
       return contrib;
@@ -668,13 +738,12 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
      * #potentialCacheRatioAfterMove} * region MB) so placement is not 
considered optimal solely
      * from low ratios when capacity exists somewhere in the cluster.
      */
-    private double potentialBestWeightedFromFreeCache(BalancerClusterState 
cluster, int region,
-      double currentHighestCache) {
-      float observedRatio = cluster.getObservedRegionCacheRatio(region);
+    private double potentialBestWeightedFromFreeCache(BalancerClusterState 
cluster, int region) {
+      float observedRatio = cluster.getSumRegionCacheAndColdDataRatio(region);
       if (observedRatio >= lowCacheRatioThreshold) {
         return 0.0;
       }
-      int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region);
+      int regionSizeMb = cluster.getRegionSizeMinusColdDataMB(region);
       if (regionSizeMb <= 0) {
         return 0.0;
       }
@@ -682,7 +751,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
       long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor);
       for (int s = 0; s < cluster.numServers; s++) {
         if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) {
-          return Math.max(currentHighestCache, regionSizeMb * 
potentialCacheRatioAfterMove);
+          return regionSizeMb * potentialCacheRatioAfterMove;
         }
       }
       return 0.0;
@@ -697,18 +766,39 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     protected void regionMoved(int region, int oldServer, int newServer) {
       double regionCacheRatioOnOldServer =
         cluster.getOrComputeWeightedRegionCacheRatio(region, oldServer);
-      double regionCacheRatioOnNewServer =
-        cluster.getOrComputeWeightedRegionCacheRatio(region, newServer);
-      double cacheRatioDiff = regionCacheRatioOnNewServer - 
regionCacheRatioOnOldServer;
-      double normalizedDelta = bestCacheRatio == 0.0 ? 0.0 : cacheRatioDiff / 
bestCacheRatio;
-      cacheRatio += normalizedDelta;
-      if (LOG.isDebugEnabled() && (cacheRatio < 0.0 || cacheRatio > 1.0)) {
+      if (simulatedRatio.equals(BigDecimal.ZERO)) {
+        double potentialCachedSizeOnNewServer =
+          cluster.getRegionSizeMinusColdDataMB(region) * 
potentialCacheRatioAfterMove;
+        boolean simulateCacheBasedOnFreeSpace =
+          cluster.getOrComputeRegionCacheRatio(region, oldServer) < 
lowCacheRatioThreshold
+            && cluster.serverBlockCacheFreeSize[newServer] >= 
potentialCachedSizeOnNewServer;
+        double regionCacheRatioOnNewServer = simulateCacheBasedOnFreeSpace
+          ? potentialCachedSizeOnNewServer
+          : cluster.getOrComputeWeightedRegionCacheRatio(region, newServer);
+        double cacheRatioDiff = regionCacheRatioOnNewServer - 
regionCacheRatioOnOldServer;
+        double normalizedDelta = bestCacheRatio == 0.0 ? 0.0 : cacheRatioDiff 
/ bestCacheRatio;
         LOG.debug(
-          
"CacheAwareCostFunction:regionMoved:region:{}:from:{}:to:{}:regionCacheRatioOnOldServer:{}:"
-            + 
"regionCacheRatioOnNewServer:{}:bestRegionCacheRatio:{}:cacheRatio:{}",
-          cluster.regions[region].getEncodedName(), 
cluster.servers[oldServer].getHostname(),
-          cluster.servers[newServer].getHostname(), 
regionCacheRatioOnOldServer,
-          regionCacheRatioOnNewServer, bestCacheRatio, cacheRatio);
+          "simulating moving region {} using simulateCacheBasedOnFreeSpace={} "
+            + "got a normalized delta of {} to be added to cacheRatio: {}",
+          cluster.regions[region].getEncodedName(), 
simulateCacheBasedOnFreeSpace, normalizedDelta,
+          cacheRatio);
+        simulatedRatio = BigDecimal.valueOf(normalizedDelta);
+        cacheRatio += normalizedDelta;
+        if (cacheRatio < 0.0 || cacheRatio > 1.0) {
+          LOG.info(
+            "Recomputing cacheRatio after calculating impact of region move: 
\n "
+              + "CacheAwareCostFunction:regionMoved:region:{}:from:{}:to:{}:"
+              + 
"regionCacheRatioOnOldServer:{}:regionCacheRatioOnNewServer:{}:"
+              + "bestRegionCacheRatio:{}:cacheRatio:{}",
+            cluster.regions[region].getEncodedName(), 
cluster.servers[oldServer].getHostname(),
+            cluster.servers[newServer].getHostname(), 
regionCacheRatioOnOldServer,
+            regionCacheRatioOnNewServer, bestCacheRatio, cacheRatio);
+          recomputeCacheRatio(cluster);
+        }
+      } else {
+        // This means we are in an undoAction call and need to reverse the 
cache delta applied in
+        // the region move simulation
+        cacheRatio -= simulatedRatio.doubleValue();
       }
     }
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index e5a9dcd3be3..d07a06503b9 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -654,6 +654,15 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
 
       newCost = computeCost(cluster, currentCost);
 
+      if (LOG.isDebugEnabled() && action.getType() == 
BalanceAction.Type.MOVE_REGION) {
+        LOG.debug(
+          "action moving region {} from {} to {} with cost {}. currentCost={}, 
functionCost={}",
+          cluster.regions[((MoveRegionAction) 
action).getRegion()].getEncodedName(),
+          cluster.servers[((MoveRegionAction) 
action).getFromServer()].getServerName(),
+          cluster.servers[((MoveRegionAction) 
action).getToServer()].getServerName(), newCost,
+          currentCost, functionCost());
+      }
+
       // Should this be kept?
       if (newCost < currentCost) {
         currentCost = newCost;
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancer.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancer.java
index 2c13f3676f0..4e0232417d4 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancer.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancer.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.master.balancer;
 
+import static org.junit.Assert.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
@@ -188,6 +189,71 @@ public class TestCacheAwareLoadBalancer extends 
BalancerTestBase {
     assertEquals(5, targetServers.get(server1).size());
   }
 
+  /**
+   * Regions on the overloaded RS report low block-cache ratio; no RS reports 
prefetch/historical
+   * cache for those regions (so {@link 
CacheAwareLoadBalancer.CacheAwareCandidateGenerator} has no
+   * "old server" to prefer). Another RS has ample free block cache. The 
balancer should still emit
+   * plans that shed load from the hot RS onto the idle RS with spare cache 
capacity.
+   */
+  @Test
+  public void 
testLowCacheRatioNoHistoricalCacheRelocatesWhenTargetHasFreeBlockCache()
+    throws Exception {
+    Map<ServerName, List<RegionInfo>> clusterState = new HashMap<>();
+    ServerName server0 = servers.get(0);
+    ServerName server1 = servers.get(1);
+    ServerName server2 = servers.get(2);
+
+    List<RegionInfo> regionsOnServer0 = randomRegions(10);
+    List<RegionInfo> regionsOnServer1 = randomRegions(0);
+    List<RegionInfo> regionsOnServer2 = randomRegions(5);
+
+    clusterState.put(server0, regionsOnServer0);
+    clusterState.put(server1, regionsOnServer1);
+    clusterState.put(server2, regionsOnServer2);
+
+    // Below LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT (0.35);
+    ServerMetrics sm0 = mockServerMetricsWithRegionCacheInfo(server0, 
regionsOnServer0, 0.1f,
+      new ArrayList<>(), 0, 10);
+    when(sm0.getCacheFreeSize()).thenReturn(0L);
+    ServerMetrics sm1 = mockServerMetricsWithRegionCacheInfo(server1, 
regionsOnServer1, 0.0f,
+      new ArrayList<>(), 0, 10);
+    // Simulates 1GB free cache space on server1
+    when(sm1.getCacheFreeSize()).thenReturn(1024L * 1024 * 1024);
+    ServerMetrics sm2 = mockServerMetricsWithRegionCacheInfo(server2, 
regionsOnServer2, 1.0f,
+      new ArrayList<>(), 0, 10);
+    when(sm2.getCacheFreeSize()).thenReturn(0L);
+
+    Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
+    serverMetricsMap.put(server0, sm0);
+    serverMetricsMap.put(server1, sm1);
+    serverMetricsMap.put(server2, sm2);
+    ClusterMetrics clusterMetrics = mock(ClusterMetrics.class);
+    when(clusterMetrics.getLiveServerMetrics()).thenReturn(serverMetricsMap);
+    loadBalancer.updateClusterMetrics(clusterMetrics);
+
+    assertTrue(loadBalancer.regionCacheRatioOnOldServerMap.isEmpty());
+
+    Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable =
+      (Map) mockClusterServersWithTables(clusterState);
+    List<RegionPlan> plans = loadBalancer.balanceCluster(loadOfAllTable);
+    assertNotNull(plans);
+
+    Set<RegionInfo> regionsMovedFromServer0 = new HashSet<>();
+    Map<ServerName, List<RegionInfo>> targetServers = new HashMap<>();
+    for (RegionPlan plan : plans) {
+      if (plan.getSource().equals(server0)) {
+        regionsMovedFromServer0.add(plan.getRegionInfo());
+        if (!targetServers.containsKey(plan.getDestination())) {
+          targetServers.put(plan.getDestination(), new ArrayList<>());
+        }
+        targetServers.get(plan.getDestination()).add(plan.getRegionInfo());
+      }
+    }
+    assertEquals(5, regionsMovedFromServer0.size());
+    assertNotNull(targetServers.get(server1));
+    assertEquals(5, targetServers.get(server1).size());
+  }
+
   @Test
   public void 
testRegionsPartiallyCachedOnOldServerAndNotCachedOnCurrentServer() throws 
Exception {
     // The regions are partially cached on old server but not cached on the 
current server
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
index 946518dc9de..44d84f02f1b 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
@@ -260,8 +260,9 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends BalancerTestBase {
   @Test
   public void testCacheCost() {
     conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, 
"/tmp/prefetch.persistence");
+    CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
     CacheAwareLoadBalancer.CacheAwareCostFunction costFunction =
-      new CacheAwareLoadBalancer.CacheAwareCostFunction(conf);
+      lb.new CacheAwareCostFunction(conf);
 
     for (int test = 0; test < clusterRegionCacheRatioMocks.length; test++) {
       int[][] clusterRegionLocations = clusterRegionCacheRatioMocks[test];
@@ -387,11 +388,6 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends BalancerTestBase {
       regionCacheRatioOnOldServerMap = oldCacheRatio;
     }
 
-    @Override
-    public int getTotalRegionHFileSizeMB(int region) {
-      return 1;
-    }
-
     @Override
     protected float getRegionCacheRatioOnRegionServer(int region, int 
regionServerIndex) {
       float cacheRatio = 0.0f;
@@ -420,5 +416,11 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends BalancerTestBase {
       }
       return cacheRatio;
     }
+
+    @Override
+    int getRegionSizeMinusColdDataMB(int region) {
+      return 1;
+    }
+
   }
 }

Reply via email to