This is an automated email from the ASF dual-hosted git repository.

ljain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new fab8a12  HDDS-5728. ContainerBalancer should use remaining space to 
calculate utilization. (#2625)
fab8a12 is described below

commit fab8a120067b11c258e9af01840f3bdc907d96c4
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Thu Sep 16 17:06:23 2021 +0530

    HDDS-5728. ContainerBalancer should use remaining space to calculate 
utilization. (#2625)
---
 .../scm/container/balancer/ContainerBalancer.java  | 34 ++++++---------
 .../container/placement/metrics/SCMNodeStat.java   | 23 ----------
 .../hadoop/hdds/scm/node/DatanodeUsageInfo.java    | 49 +++++++++++++++-------
 .../hadoop/hdds/scm/node/SCMNodeManager.java       | 47 ++++++++++-----------
 .../hadoop/hdds/scm/container/MockNodeManager.java |  4 +-
 .../container/balancer/TestContainerBalancer.java  | 13 ++++++
 6 files changed, 84 insertions(+), 86 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
index 5ea43dd..c959674 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
@@ -83,6 +83,7 @@ public class ContainerBalancer {
   private ContainerBalancerMetrics metrics;
   private long clusterCapacity;
   private long clusterUsed;
+  private long clusterRemaining;
   private double clusterAvgUtilisation;
   private volatile boolean balancerRunning;
   private Thread currentBalancingThread;
@@ -241,6 +242,7 @@ public class ContainerBalancer {
     this.totalNodesInCluster = datanodeUsageInfos.size();
     this.clusterCapacity = 0L;
     this.clusterUsed = 0L;
+    this.clusterRemaining = 0L;
     this.selectedContainers.clear();
     this.overUtilizedNodes.clear();
     this.underUtilizedNodes.clear();
@@ -273,7 +275,7 @@ public class ContainerBalancer {
 
     // find over and under utilized nodes
     for (DatanodeUsageInfo datanodeUsageInfo : datanodeUsageInfos) {
-      double utilization = calculateUtilization(datanodeUsageInfo);
+      double utilization = datanodeUsageInfo.calculateUtilization();
       if (LOG.isDebugEnabled()) {
         LOG.debug("Utilization for node {} is {}",
             datanodeUsageInfo.getDatanodeDetails().getUuidString(),
@@ -433,6 +435,8 @@ public class ContainerBalancer {
                 moveSelection.getContainerID(), e);
           }
           metrics.incrementMovedContainersNum(1);
+          // TODO incrementing size balanced this way incorrectly counts the
+          //  size moved twice
           metrics.incrementDataSizeBalancedGB(sizeMovedPerIteration);
         }
       } catch (InterruptedException e) {
@@ -588,6 +592,8 @@ public class ContainerBalancer {
       Collection<DatanodeDetails> potentialTargets,
       Set<DatanodeDetails> selectedTargets,
       ContainerMoveSelection moveSelection, DatanodeDetails source) {
+    // TODO: counting datanodes involved this way is incorrect when the same
+    //  datanode is involved in different moves
     countDatanodesInvolvedPerIteration += 2;
     incSizeSelectedForMoving(source, moveSelection);
     sourceToTargetMap.put(source, moveSelection);
@@ -596,7 +602,7 @@ public class ContainerBalancer {
     selectionCriteria.setSelectedContainers(selectedContainers);
 
     return potentialTargets.stream()
-        .filter(node -> sizeEnteringNode.get(node) <=
+        .filter(node -> sizeEnteringNode.get(node) <
             config.getMaxSizeEnteringTarget()).collect(Collectors.toList());
   }
 
@@ -613,12 +619,12 @@ public class ContainerBalancer {
 
   /**
    * Calculates the average utilization for the specified nodes.
-   * Utilization is used space divided by capacity.
+   * Utilization is (capacity - remaining) divided by capacity.
    *
    * @param nodes List of DatanodeUsageInfo to find the average utilization for
    * @return Average utilization value
    */
-  private double calculateAvgUtilization(List<DatanodeUsageInfo> nodes) {
+  double calculateAvgUtilization(List<DatanodeUsageInfo> nodes) {
     if (nodes.size() == 0) {
       LOG.warn("No nodes to calculate average utilization for in " +
           "ContainerBalancer.");
@@ -631,23 +637,9 @@ public class ContainerBalancer {
     }
     clusterCapacity = aggregatedStats.getCapacity().get();
     clusterUsed = aggregatedStats.getScmUsed().get();
+    clusterRemaining = aggregatedStats.getRemaining().get();
 
-    return clusterUsed / (double) clusterCapacity;
-  }
-
-  /**
-   * Calculates the utilization, that is used space divided by capacity, for
-   * the given datanodeUsageInfo.
-   *
-   * @param datanodeUsageInfo DatanodeUsageInfo to calculate utilization for
-   * @return Utilization value
-   */
-  public static double calculateUtilization(
-      DatanodeUsageInfo datanodeUsageInfo) {
-    SCMNodeStat stat = datanodeUsageInfo.getScmNodeStat();
-
-    return stat.getScmUsed().get().doubleValue() /
-        stat.getCapacity().get().doubleValue();
+    return (clusterCapacity - clusterRemaining) / (double) clusterCapacity;
   }
 
   /**
@@ -765,7 +757,7 @@ public class ContainerBalancer {
    *
    * @return List of DatanodeUsageInfo containing unBalanced nodes.
    */
-  public List<DatanodeUsageInfo> getUnBalancedNodes() {
+  List<DatanodeUsageInfo> getUnBalancedNodes() {
     return unBalancedNodes;
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMNodeStat.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMNodeStat.java
index 270df05..962bbb4 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMNodeStat.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMNodeStat.java
@@ -138,27 +138,4 @@ public class SCMNodeStat implements NodeStat {
   public int hashCode() {
     return Long.hashCode(capacity.get() ^ scmUsed.get() ^ remaining.get());
   }
-
-  /**
-   * Compares this SCMNodeStat with other on the basis of remaining to
-   * capacity ratio.
-   *
-   * @param other The SCMNodeStat object to compare with this object.
-   * @return A value greater than 0 if this has lesser remaining ratio than the
-   * specified other, a value lesser than 0 if this has greater remaining ratio
-   * than the specified other, and 0 if remaining ratios are equal.
-   */
-  public int compareByRemainingRatio(SCMNodeStat other) {
-    Preconditions.checkNotNull(other, "Argument cannot be null");
-
-    // if capacity is zero, replace with 1 for division to work
-    double thisCapacity = Math.max(this.getCapacity().get().doubleValue(), 1d);
-    double otherCapacity = Math.max(
-        other.getCapacity().get().doubleValue(), 1d);
-
-    double thisRemainingRatio = this.getRemaining().get() / thisCapacity;
-    double otherRemainingRatio = other.getRemaining().get() / otherCapacity;
-
-    return Double.compare(otherRemainingRatio, thisRemainingRatio);
-  }
 }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java
index 2f8ee04..0100e4a 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java
@@ -46,23 +46,39 @@ public class DatanodeUsageInfo {
   }
 
   /**
-   * Compares two DatanodeUsageInfo on the basis of remaining space to capacity
-   * ratio.
+   * Compares two DatanodeUsageInfo on the basis of their utilization values,
+   * calculated using {@link DatanodeUsageInfo#calculateUtilization()}.
    *
    * @param first DatanodeUsageInfo
    * @param second DatanodeUsageInfo
-   * @return a value greater than 0 if second has higher remaining to
-   * capacity ratio, a value lesser than 0 if first has higher remaining to
-   * capacity ratio, and 0 if both have equal ratios or first.equals(second)
-   * is true
+   * @return a value greater than 0 first is more utilized, lesser than 0 if
+   * first is less utilized, and 0 if both have equal utilization values or
+   * first.equals(second) is true.
    */
-  private static int compareByRemainingRatio(DatanodeUsageInfo first,
+  private static int compareByUtilization(DatanodeUsageInfo first,
                              DatanodeUsageInfo second) {
     if (first.equals(second)) {
       return 0;
     }
-    return first.getScmNodeStat()
-        .compareByRemainingRatio(second.getScmNodeStat());
+    return Double.compare(first.calculateUtilization(),
+        second.calculateUtilization());
+  }
+
+  /**
+   * Calculates utilization of a datanode. Utilization of a datanode is defined
+   * as its used space divided by its capacity. Here, we prefer calculating
+   * used space as (capacity - remaining), instead of using
+   * {@link SCMNodeStat#getScmUsed()} (see HDDS-5728).
+   *
+   * @return (capacity - remaining) / capacity of this datanode
+   */
+  public double calculateUtilization() {
+    double capacity = scmNodeStat.getCapacity().get();
+    if (capacity == 0) {
+      return 0;
+    }
+    double numerator = capacity - scmNodeStat.getRemaining().get();
+    return numerator / capacity;
   }
 
   /**
@@ -105,16 +121,17 @@ public class DatanodeUsageInfo {
 
   /**
    * Gets Comparator that compares two DatanodeUsageInfo on the basis of
-   * remaining space to capacity ratio.
+   * their utilization values. Utilization is (capacity - remaining) divided
+   * by capacity.
    *
    * @return Comparator to compare two DatanodeUsageInfo. The comparison
-   * function returns a value greater than 0 if second DatanodeUsageInfo has
-   * greater remaining space to capacity ratio, a value lesser than 0 if
-   * first DatanodeUsageInfo has greater remaining space to capacity ratio,
-   * and 0 if both have equal ratios or first.equals(second) is true
+   * function returns a value greater than 0 if first DatanodeUsageInfo has
+   * greater utilization, a value lesser than 0 if first DatanodeUsageInfo
+   * has lesser utilization, and 0 if both have equal utilization values or
+   * first.equals(second) is true
    */
-  public static Comparator<DatanodeUsageInfo> getMostUsedByRemainingRatio() {
-    return DatanodeUsageInfo::compareByRemainingRatio;
+  public static Comparator<DatanodeUsageInfo> getMostUtilized() {
+    return DatanodeUsageInfo::compareByUtilization;
   }
 
   /**
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index c3db8d7..1d9ccc2 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -17,26 +17,6 @@
  */
 package org.apache.hadoop.hdds.scm.node;
 
-import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
-import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY_READONLY;
-
-import javax.management.ObjectName;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-import java.util.Collections;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ScheduledFuture;
-import java.util.stream.Collectors;
-
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
@@ -47,8 +27,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
-import org.apache.hadoop.hdds.protocol.proto
-    .StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
+import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode;
@@ -89,6 +68,26 @@ import 
org.apache.ratis.protocol.exceptions.NotLeaderException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.management.ObjectName;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ScheduledFuture;
+import java.util.stream.Collectors;
+
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY_READONLY;
+
 /**
  * Maintains information about the Datanodes on SCM side.
  * <p>
@@ -695,10 +694,10 @@ public class SCMNodeManager implements NodeManager {
     // sort the list according to appropriate comparator
     if (mostUsed) {
       datanodeUsageInfoList.sort(
-          DatanodeUsageInfo.getMostUsedByRemainingRatio().reversed());
+          DatanodeUsageInfo.getMostUtilized().reversed());
     } else {
       datanodeUsageInfoList.sort(
-          DatanodeUsageInfo.getMostUsedByRemainingRatio());
+          DatanodeUsageInfo.getMostUtilized());
     }
     return datanodeUsageInfoList;
   }
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
index b2289c4..7b02bfe 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
@@ -384,9 +384,9 @@ public class MockNodeManager implements NodeManager {
     }
     Comparator<DatanodeUsageInfo> comparator;
     if (mostUsed) {
-      comparator = DatanodeUsageInfo.getMostUsedByRemainingRatio().reversed();
+      comparator = DatanodeUsageInfo.getMostUtilized().reversed();
     } else {
-      comparator = DatanodeUsageInfo.getMostUsedByRemainingRatio();
+      comparator = DatanodeUsageInfo.getMostUtilized();
     }
 
     return datanodeDetailsList.stream()
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
index d420dc1..71f29b5 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
@@ -140,6 +140,19 @@ public class TestContainerBalancer {
         replicationManager, conf, SCMContext.emptyContext(), placementPolicy);
   }
 
+  @Test
+  public void testCalculationOfUtilization() {
+    Assert.assertEquals(nodesInCluster.size(), nodeUtilizations.size());
+    for (int i = 0; i < nodesInCluster.size(); i++) {
+      Assert.assertEquals(nodeUtilizations.get(i),
+          nodesInCluster.get(i).calculateUtilization(), 0.0001);
+    }
+
+    // should be equal to average utilization of the cluster
+    Assert.assertEquals(averageUtilization,
+        containerBalancer.calculateAvgUtilization(nodesInCluster), 0.0001);
+  }
+
   /**
    * Checks whether ContainerBalancer is correctly updating the list of
    * unBalanced nodes with varying values of Threshold.

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to