GlenGeng commented on a change in pull request #2230:
URL: https://github.com/apache/ozone/pull/2230#discussion_r630705685



##########
File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
##########
@@ -59,74 +80,321 @@ public ContainerBalancer(
     this.ozoneConfiguration = ozoneConfiguration;
     this.balancerRunning = false;
     this.config = new ContainerBalancerConfiguration();
+    this.metrics = new ContainerBalancerMetrics();
   }
 
   /**
-   * Start ContainerBalancer. Current implementation is incomplete.
+   * Starts ContainerBalancer. Current implementation is incomplete.
    *
    * @param balancerConfiguration Configuration values.
    */
   public void start(ContainerBalancerConfiguration balancerConfiguration) {
+    if (balancerRunning) {
+      LOG.info("Container Balancer is already running.");
+      throw new RuntimeException();
+    }
     this.balancerRunning = true;
-
     ozoneConfiguration = new OzoneConfiguration();
 
-    // initialise configs
     this.config = balancerConfiguration;
     this.threshold = config.getThreshold();
-    this.maxDatanodesToBalance =
-        config.getMaxDatanodesToBalance();
+    this.maxDatanodesToBalance = config.getMaxDatanodesToBalance();
     this.maxSizeToMove = config.getMaxSizeToMove();
 
+    this.clusterCapacity = 0L;
+    this.clusterUsed = 0L;
+    this.clusterRemaining = 0L;
+
+    this.overUtilizedNodes = new ArrayList<>();
+    this.underUtilizedNodes = new ArrayList<>();
+    this.aboveAverageUtilizedNodes = new ArrayList<>();
+    this.belowAverageUtilizedNodes = new ArrayList<>();
+    this.sourceNodes = new ArrayList<>();
+
     LOG.info("Starting Container Balancer...");
+    LOG.info(toString());
+
+    balance();
+  }
 
-    // sorted list in order from most to least used
-    List<DatanodeUsageInfo> nodes = nodeManager.
-        getMostOrLeastUsedDatanodes(true);
-    double avgUtilisation = calculateAvgUtilisation(nodes);
+  /**
+   * Balances the cluster.
+   */
+  private void balance() {
+    overUtilizedNodes.clear();
+    underUtilizedNodes.clear();
+    aboveAverageUtilizedNodes.clear();
+    belowAverageUtilizedNodes.clear();
+    initializeIteration();
+  }
+
+  /**
+   * Initializes an iteration during balancing. Recognizes over, under,
+   * below-average,and under-average utilizes nodes. Decides whether
+   * balancing needs to continue or should be stopped.
+   *
+   * @return true if successfully initialized, otherwise false.
+   */
+  private boolean initializeIteration() {
+    List<DatanodeUsageInfo> nodes;
+    try {
+      // sorted list in order from most to least used
+      nodes = nodeManager.getMostOrLeastUsedDatanodes(true);
+    } catch (NullPointerException e) {
+      LOG.error("Container Balancer could not retrieve nodes from Node " +
+          "Manager.", e);
+      stop();
+      return false;
+    }
+
+    try {
+      clusterAvgUtilisation = calculateAvgUtilization(nodes);
+    } catch(ArithmeticException e) {
+      LOG.warn("Container Balancer failed to initialize an iteration", e);
+      return false;
+    }
+    LOG.info("Average utilization of the cluster is {}", 
clusterAvgUtilisation);
 
     // under utilized nodes have utilization(that is, used / capacity) less
     // than lower limit
-    double lowerLimit = avgUtilisation - threshold;
+    double lowerLimit = clusterAvgUtilisation - threshold;
 
     // over utilized nodes have utilization(that is, used / capacity) greater
     // than upper limit
-    double upperLimit = avgUtilisation + threshold;
+    double upperLimit = clusterAvgUtilisation + threshold;
+

Review comment:
       Question:
   
   say we have a 10 DN cluster, the usage of all of them is 95%, then one empty 
DN is added to rebalance the cluster. Given the threshold is 10%, it seems the 
balancer will not work in this case, since that 10 DN will not achieve 
upperLimit.
   
   Have we consider corner case like this ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to