This is an automated email from the ASF dual-hosted git repository.

sammichen pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-5713 by this push:
     new c00288af23d HDDS-13611. [DiskBalancer] Inconsistent VolumeDataDensity 
calculations between SCM and DN and incorrect EstBytesToMove (#8972)
c00288af23d is described below

commit c00288af23d39735b52d6782a627d3d9d4afed46
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Fri Oct 10 13:43:53 2025 +0530

    HDDS-13611. [DiskBalancer] Inconsistent VolumeDataDensity calculations 
between SCM and DN and incorrect EstBytesToMove (#8972)
---
 .../container/common/volume/MutableVolumeSet.java  |  13 ---
 .../container/diskbalancer/DiskBalancerInfo.java   |   6 +-
 .../diskbalancer/DiskBalancerService.java          |  73 +++++++-----
 .../DiskBalancerVolumeCalculation.java             | 122 +++++++++++++++++++++
 .../policy/DefaultVolumeChoosingPolicy.java        |  16 ++-
 .../diskbalancer/TestDiskBalancerService.java      |   5 +-
 .../proto/ScmServerDatanodeHeartbeatProtocol.proto |   1 +
 .../hadoop/hdds/scm/node/DiskBalancerManager.java  |  53 ++-------
 .../hadoop/hdds/scm/node/DiskBalancerStatus.java   |   9 +-
 .../hdds/scm/node/TestDiskBalancerManager.java     |   7 ++
 .../hadoop/ozone/scm/node/TestDiskBalancer.java    |  27 +++++
 11 files changed, 240 insertions(+), 92 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
index 8ce114cfd40..9a691fe92e0 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
@@ -18,7 +18,6 @@
 package org.apache.hadoop.ozone.container.common.volume;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import java.io.IOException;
@@ -34,7 +33,6 @@
 import org.apache.hadoop.fs.StorageType;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.fs.SpaceUsageCheckFactory;
-import org.apache.hadoop.hdds.fs.SpaceUsageSource;
 import org.apache.hadoop.hdds.utils.HddsServerUtil;
 import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
 import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport;
@@ -507,15 +505,4 @@ public StorageVolume.VolumeType getVolumeType() {
   public VolumeHealthMetrics getVolumeHealthMetrics() {
     return volumeHealthMetrics;
   }
-
-  public double getIdealUsage() {
-    long totalCapacity = 0L, totalFree = 0L;
-    for (StorageVolume volume: volumeMap.values()) {
-      SpaceUsageSource usage = volume.getCurrentUsage();
-      totalCapacity += usage.getCapacity();
-      totalFree += usage.getAvailable();
-    }
-    Preconditions.checkArgument(totalCapacity != 0);
-    return ((double) (totalCapacity - totalFree)) / totalCapacity;
-  }
 }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
index 579d3db23f2..882c8c71016 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
@@ -37,6 +37,7 @@ public class DiskBalancerInfo {
   private long failureCount;
   private long bytesToMove;
   private long balancedBytes;
+  private double volumeDataDensity;
 
   public DiskBalancerInfo(DiskBalancerOperationalState operationalState, 
double threshold,
       long bandwidthInMB, int parallelThread, boolean stopAfterDiskEven) {
@@ -57,7 +58,7 @@ public DiskBalancerInfo(DiskBalancerOperationalState 
operationalState, double th
   @SuppressWarnings("checkstyle:ParameterNumber")
   public DiskBalancerInfo(DiskBalancerOperationalState operationalState, 
double threshold,
       long bandwidthInMB, int parallelThread, boolean stopAfterDiskEven, 
DiskBalancerVersion version,
-      long successCount, long failureCount, long bytesToMove, long 
balancedBytes) {
+      long successCount, long failureCount, long bytesToMove, long 
balancedBytes, double volumeDataDensity) {
     this.operationalState = operationalState;
     this.threshold = threshold;
     this.bandwidthInMB = bandwidthInMB;
@@ -68,6 +69,7 @@ public DiskBalancerInfo(DiskBalancerOperationalState 
operationalState, double th
     this.failureCount = failureCount;
     this.bytesToMove = bytesToMove;
     this.balancedBytes = balancedBytes;
+    this.volumeDataDensity = volumeDataDensity;
   }
 
   public DiskBalancerInfo(boolean shouldRun,
@@ -112,6 +114,8 @@ public 
StorageContainerDatanodeProtocolProtos.DiskBalancerReportProto toDiskBala
     builder.setFailureMoveCount(failureCount);
     builder.setBytesToMove(bytesToMove);
     builder.setBalancedBytes(balancedBytes);
+    builder.setVolumeDataDensity(volumeDataDensity);
+    
     return builder.build();
   }
 
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
index 2eabf1ac236..aa9dd257678 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
@@ -21,6 +21,7 @@
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
@@ -38,6 +39,7 @@
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.conf.StorageUnit;
+import org.apache.hadoop.hdds.fs.SpaceUsageSource;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -121,7 +123,6 @@ public class DiskBalancerService extends BackgroundService {
   private final File diskBalancerInfoFile;
 
   private DiskBalancerServiceMetrics metrics;
-  private long bytesToMove;
   private long containerDefaultSize;
 
   /**
@@ -401,8 +402,7 @@ public BackgroundTaskQueue getTasks() {
       }
     }
 
-    if (queue.isEmpty()) {
-      bytesToMove = 0;
+    if (queue.isEmpty() && inProgressContainers.isEmpty()) {
       if (stopAfterDiskEven) {
         LOG.info("Disk balancer is stopped due to disk even as" +
             " the property StopAfterDiskEven is set to true.");
@@ -415,8 +415,6 @@ public BackgroundTaskQueue getTasks() {
         }
       }
       metrics.incrIdleLoopNoAvailableVolumePairCount();
-    } else {
-      bytesToMove = calculateBytesToMove(volumeSet);
     }
 
     return queue;
@@ -620,43 +618,58 @@ private void postCall(boolean success, long startTime) {
   }
 
   public DiskBalancerInfo getDiskBalancerInfo() {
-    return new DiskBalancerInfo(operationalState, threshold, bandwidthInMB,
-        parallelThread, stopAfterDiskEven, version, metrics.getSuccessCount(),
-        metrics.getFailureCount(), bytesToMove, metrics.getSuccessBytes());
-  }
+    ImmutableList<HddsVolume> immutableVolumeSet = 
DiskBalancerVolumeCalculation.getImmutableVolumeSet(volumeSet);
 
-  public long calculateBytesToMove(MutableVolumeSet inputVolumeSet) {
-    long bytesPendingToMove = 0;
-    long totalFreeSpace = 0;
-    long totalCapacity = 0;
+    // Calculate volumeDataDensity
+    double volumeDatadensity = 0.0;
+    volumeDatadensity = 
DiskBalancerVolumeCalculation.calculateVolumeDataDensity(immutableVolumeSet, 
deltaSizes);
 
-    for (HddsVolume volume : 
StorageVolumeUtil.getHddsVolumesList(inputVolumeSet.getVolumesList())) {
-      totalFreeSpace += volume.getCurrentUsage().getAvailable();
-      totalCapacity += volume.getCurrentUsage().getCapacity();
+    long bytesToMove = 0;
+    if (this.operationalState == DiskBalancerOperationalState.RUNNING) {
+      // this calculates live changes in bytesToMove
+      // calculate bytes to move if the balancer is in a running state, else 0.
+      bytesToMove = calculateBytesToMove(immutableVolumeSet);
     }
 
-    if (totalCapacity == 0) {
+    return new DiskBalancerInfo(operationalState, threshold, bandwidthInMB,
+        parallelThread, stopAfterDiskEven, version, metrics.getSuccessCount(),
+        metrics.getFailureCount(), bytesToMove, metrics.getSuccessBytes(), 
volumeDatadensity);
+  }
+
+  public long calculateBytesToMove(ImmutableList<HddsVolume> inputVolumeSet) {
+    // If there are no available volumes or only one volume, return 0 bytes to 
move
+    if (inputVolumeSet.isEmpty() || inputVolumeSet.size() < 2) {
       return 0;
     }
 
-    double datanodeUtilization = ((double) (totalCapacity - totalFreeSpace)) / 
totalCapacity;
+    // Calculate ideal usage
+    double idealUsage = 
DiskBalancerVolumeCalculation.getIdealUsage(inputVolumeSet);
+    double normalizedThreshold = threshold / 100.0;
 
-    double thresholdFraction = threshold / 100.0;
-    double upperLimit = datanodeUtilization + thresholdFraction;
+    long totalBytesToMove = 0;
 
     // Calculate excess data in overused volumes
-    for (HddsVolume volume : 
StorageVolumeUtil.getHddsVolumesList(inputVolumeSet.getVolumesList())) {
-      long freeSpace = volume.getCurrentUsage().getAvailable();
-      long capacity = volume.getCurrentUsage().getCapacity();
-      double volumeUtilization = ((double) (capacity - freeSpace)) / capacity;
-
-      // Consider only volumes exceeding the upper threshold
-      if (volumeUtilization > upperLimit) {
-        long excessData = (capacity - freeSpace) - (long) (upperLimit * 
capacity);
-        bytesPendingToMove += excessData;
+    for (HddsVolume volume : inputVolumeSet) {
+      SpaceUsageSource usage = volume.getCurrentUsage();
+
+      if (usage.getCapacity() == 0) {
+        continue;
+      }
+
+      long deltaSize = deltaSizes.getOrDefault(volume, 0L);
+      double currentUsage = (double)((usage.getCapacity() - 
usage.getAvailable())
+          + deltaSize + volume.getCommittedBytes()) / usage.getCapacity();
+
+      double volumeUtilisation = currentUsage - idealUsage;
+
+      // Only consider volumes that exceed the threshold (source volumes)
+      if (volumeUtilisation >= normalizedThreshold) {
+        // Calculate excess bytes that need to be moved from this volume
+        long excessBytes = (long) ((volumeUtilisation - normalizedThreshold) * 
usage.getCapacity());
+        totalBytesToMove += Math.max(0, excessBytes);
       }
     }
-    return bytesPendingToMove;
+    return totalBytesToMove;
   }
 
   private Path getDiskBalancerTmpDir(HddsVolume hddsVolume) {
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerVolumeCalculation.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerVolumeCalculation.java
new file mode 100644
index 00000000000..6bfb168d1da
--- /dev/null
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerVolumeCalculation.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.container.diskbalancer;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.util.List;
+import java.util.Map;
+import org.apache.hadoop.hdds.fs.SpaceUsageSource;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class for disk balancer volume calculations.
+ * 
+ * This class provides shared functionality for
+ * creating immutable volume snapshots,
+ * calculating ideal usage, and
+ * volume data density
+ * ensuring consistency across all disk balancing components and preventing 
race conditions.
+ */
+public final class DiskBalancerVolumeCalculation {
+  
+  private static final Logger LOG = 
LoggerFactory.getLogger(DiskBalancerVolumeCalculation.class);
+  
+  private DiskBalancerVolumeCalculation() {
+    // Utility class - prevent instantiation
+  }
+  
+  /**
+   * Get an immutable snapshot of volumes from a MutableVolumeSet.
+   * 
+   * @param volumeSet The MutableVolumeSet to create a snapshot from
+   * @return Immutable list of HddsVolume objects
+   */
+  public static ImmutableList<HddsVolume> 
getImmutableVolumeSet(MutableVolumeSet volumeSet) {
+    // Create an immutable copy of the volume list at this point in time
+    List<HddsVolume> volumes = 
StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList());
+    return ImmutableList.copyOf(volumes);
+  }
+  
+  /**
+   * Get ideal usage from an immutable list of volumes.
+   * 
+   * @param volumes Immutable list of volumes
+   * @return Ideal usage as a ratio (used space / total capacity)
+   * @throws IllegalArgumentException if total capacity is zero
+   */
+  public static double getIdealUsage(ImmutableList<HddsVolume> volumes) {
+    long totalCapacity = 0L, totalFree = 0L;
+    
+    for (HddsVolume volume : volumes) {
+      SpaceUsageSource usage = volume.getCurrentUsage();
+      totalCapacity += usage.getCapacity();
+      totalFree += usage.getAvailable();
+    }
+    
+    Preconditions.checkArgument(totalCapacity != 0);
+    return ((double) (totalCapacity - totalFree)) / totalCapacity;
+  }
+  
+  /**
+   * Calculate VolumeDataDensity.
+   * 
+   * @param volumeSet The MutableVolumeSet containing all volumes
+   * @param deltaMap Map of volume to delta sizes (ongoing operations), can be 
null
+   * @return VolumeDataDensity sum across all volumes
+   */
+  public static double calculateVolumeDataDensity(ImmutableList<HddsVolume> 
volumeSet, Map<HddsVolume, Long> deltaMap) {
+    if (volumeSet == null) {
+      LOG.warn("VolumeSet is null, returning 0.0 for VolumeDataDensity");
+      return 0.0;
+    }
+    
+    try {
+      // If there is only one volume, return 0.0 as there's no imbalance to 
measure
+      if (volumeSet.size() <= 1) {
+        return 0.0;
+      }
+
+      // Calculate ideal usage using the same immutable volume snapshot
+      double idealUsage = getIdealUsage(volumeSet);
+      double volumeDensitySum = 0.0;
+
+      // Calculate density for each volume using the same snapshot
+      for (HddsVolume volume : volumeSet) {
+        SpaceUsageSource usage = volume.getCurrentUsage();
+        Preconditions.checkArgument(usage.getCapacity() != 0);
+
+        long deltaSize = (deltaMap != null) ? deltaMap.getOrDefault(volume, 
0L) : 0L;
+        double currentUsage = (double)((usage.getCapacity() - 
usage.getAvailable())
+            + deltaSize + volume.getCommittedBytes()) / usage.getCapacity();
+        
+        // Calculate density as absolute difference from ideal usage
+        double volumeDensity = Math.abs(currentUsage - idealUsage);
+        volumeDensitySum += volumeDensity;
+      }
+      return volumeDensitySum;
+    } catch (Exception e) {
+      LOG.error("Error calculating VolumeDataDensity", e);
+      return -1.0;
+    }
+  }
+}
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
index 20cd2aef0c4..021488987a7 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
@@ -17,16 +17,17 @@
 
 package org.apache.hadoop.ozone.container.diskbalancer.policy;
 
+import com.google.common.collect.ImmutableList;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.hdds.fs.SpaceUsageSource;
-import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
 import org.apache.hadoop.ozone.container.common.volume.AvailableSpaceFilter;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
+import 
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -52,12 +53,19 @@ public Pair<HddsVolume, HddsVolume> 
chooseVolume(MutableVolumeSet volumeSet,
       double threshold, Map<HddsVolume, Long> deltaMap, long containerSize) {
     lock.lock();
     try {
-      double idealUsage = volumeSet.getIdealUsage();
+      // Create truly immutable snapshot of volumes to ensure consistency
+      ImmutableList<HddsVolume> allVolumes = 
DiskBalancerVolumeCalculation.getImmutableVolumeSet(volumeSet);
+
+      if (allVolumes.size() < 2) {
+        return null; // Can't balance with less than 2 volumes.
+      }
+      
+      // Calculate ideal usage using the same immutable volume
+      double idealUsage = 
DiskBalancerVolumeCalculation.getIdealUsage(allVolumes);
 
       // Threshold is given as a percentage
       double normalizedThreshold = threshold / 100;
-      List<HddsVolume> volumes = StorageVolumeUtil
-          .getHddsVolumesList(volumeSet.getVolumesList())
+      List<HddsVolume> volumes = allVolumes
           .stream()
           .filter(volume -> {
             SpaceUsageSource usage = volume.getCurrentUsage();
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
index e75b861e7c0..0d22a896062 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
@@ -27,6 +27,7 @@
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
+import com.google.common.collect.ImmutableList;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
@@ -251,8 +252,10 @@ public void testCalculateBytesToMove(int volumeCount, int 
deltaUsagePercent,
     long expectedBytesToMove = (long) Math.ceil(
         (totalCapacity * expectedBytesToMovePercent) / 100.0 * 
totalOverUtilisedVolumes);
 
+    ImmutableList<HddsVolume> immutableVolumes = 
DiskBalancerVolumeCalculation.getImmutableVolumeSet(volumeSet);
+
     // data precision loss due to double data involved in calculation
-    assertTrue(Math.abs(expectedBytesToMove - 
svc.calculateBytesToMove(volumeSet)) <= 1);
+    assertTrue(Math.abs(expectedBytesToMove - 
svc.calculateBytesToMove(immutableVolumes)) <= 1);
   }
 
   @Test
diff --git 
a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
 
b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
index 166309d4db6..991f51c5bd9 100644
--- 
a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
+++ 
b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
@@ -496,6 +496,7 @@ message DiskBalancerReportProto {
   optional uint64 successMoveCount = 4;
   optional uint64 failureMoveCount = 5;
   optional uint64 bytesToMove = 6;
+  optional double volumeDataDensity = 7;
 }
 
 /**
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
index 5e66d081a25..61006ebf3b2 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
@@ -22,7 +22,6 @@
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -34,7 +33,6 @@
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerRunningStatus;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DiskBalancerReportProto;
-import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
 import org.apache.hadoop.hdds.scm.DatanodeAdminError;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.ha.SCMContext;
@@ -84,10 +82,9 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto> 
getDiskBalancerReport(
 
     for (DatanodeDetails datanodeDetails: nodeManager.getNodes(IN_SERVICE,
         HddsProtos.NodeState.HEALTHY)) {
-      double volumeDensitySum =
-          getVolumeDataDensitySumForDatanodeDetails(datanodeDetails);
+      DiskBalancerStatus status = getStatus(datanodeDetails);
       reportList.add(HddsProtos.DatanodeDiskBalancerInfoProto.newBuilder()
-          .setCurrentVolumeDensitySum(volumeDensitySum)
+          .setCurrentVolumeDensitySum(status.getVolumeDataDensity())
           .setNode(datanodeDetails.toProto(clientVersion))
           .build());
     }
@@ -267,14 +264,12 @@ private boolean shouldReturnDatanode(
 
   private HddsProtos.DatanodeDiskBalancerInfoProto getInfoProto(
       DatanodeInfo dn, int clientVersion) {
-    double volumeDensitySum =
-        getVolumeDataDensitySumForDatanodeDetails(dn);
     DiskBalancerStatus status = getStatus(dn);
 
     HddsProtos.DatanodeDiskBalancerInfoProto.Builder builder =
         HddsProtos.DatanodeDiskBalancerInfoProto.newBuilder()
             .setNode(dn.toProto(clientVersion))
-            .setCurrentVolumeDensitySum(volumeDensitySum)
+            .setCurrentVolumeDensitySum(status.getVolumeDataDensity())
             .setRunningStatus(status.getRunningStatus())
             .setSuccessMoveCount(status.getSuccessMoveCount())
             .setFailureMoveCount(status.getFailureMoveCount())
@@ -287,45 +282,16 @@ private HddsProtos.DatanodeDiskBalancerInfoProto 
getInfoProto(
     return builder.build();
   }
 
-  /**
-   * Get volume density for a specific DatanodeDetails node.
-   *
-   * @param datanodeDetails DatanodeDetails
-   * @return DiskBalancer report.
-   */
-  private double getVolumeDataDensitySumForDatanodeDetails(
-      DatanodeDetails datanodeDetails) {
-    Preconditions.checkArgument(datanodeDetails instanceof DatanodeInfo);
-
-    DatanodeInfo datanodeInfo = (DatanodeInfo) datanodeDetails;
-
-    double totalCapacity = 0d, totalFree = 0d;
-    for (StorageReportProto reportProto : datanodeInfo.getStorageReports()) {
-      totalCapacity += reportProto.getCapacity();
-      totalFree += reportProto.getRemaining();
-    }
-
-    Preconditions.checkArgument(totalCapacity != 0);
-    double idealUsage = (totalCapacity - totalFree) / totalCapacity;
-
-    double volumeDensitySum = datanodeInfo.getStorageReports().stream()
-        .map(report ->
-            Math.abs(((double) (report.getCapacity() - report.getRemaining())) 
/ report.getCapacity()
-                - idealUsage))
-        .mapToDouble(Double::valueOf).sum();
-
-    return volumeDensitySum;
-  }
-
   public DiskBalancerStatus getStatus(DatanodeDetails datanodeDetails) {
     return statusMap.computeIfAbsent(datanodeDetails,
-        dn -> new DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN, new 
DiskBalancerConfiguration(), 0, 0, 0, 0));
+        dn -> new DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN, new 
DiskBalancerConfiguration(),
+            0, 0, 0, 0, Double.NaN));
   }
 
   @VisibleForTesting
   public void addRunningDatanode(DatanodeDetails datanodeDetails) {
     statusMap.put(datanodeDetails, new 
DiskBalancerStatus(DiskBalancerRunningStatus.RUNNING,
-        new DiskBalancerConfiguration(), 0, 0, 0, 0));
+        new DiskBalancerConfiguration(), 0, 0, 0, 0, 0.0));
   }
 
   public void processDiskBalancerReport(DiskBalancerReportProto reportProto,
@@ -340,9 +306,12 @@ public void 
processDiskBalancerReport(DiskBalancerReportProto reportProto,
     long failureMoveCount = reportProto.getFailureMoveCount();
     long bytesToMove = reportProto.getBytesToMove();
     long balancedBytes = reportProto.getBalancedBytes();
+    double volumeDataDensity = reportProto.getVolumeDataDensity();
+
     statusMap.put(dn, new DiskBalancerStatus(
         isRunning ? DiskBalancerRunningStatus.RUNNING : 
DiskBalancerRunningStatus.STOPPED,
-        diskBalancerConfiguration, successMoveCount, failureMoveCount, 
bytesToMove, balancedBytes));
+        diskBalancerConfiguration, successMoveCount, failureMoveCount, 
bytesToMove, balancedBytes,
+        volumeDataDensity));
     if (reportProto.hasBalancedBytes() && balancedBytesMap != null) {
       balancedBytesMap.put(dn, reportProto.getBalancedBytes());
     }
@@ -353,7 +322,7 @@ public void markStatusUnknown(DatanodeDetails dn) {
     if (currentStatus != null &&
         currentStatus.getRunningStatus() != DiskBalancerRunningStatus.UNKNOWN) 
{
       DiskBalancerStatus unknownStatus = new 
DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN,
-          new DiskBalancerConfiguration(), 0, 0, 0, 0);
+          new DiskBalancerConfiguration(), 0, 0, 0, 0, Double.NaN);
       statusMap.put(dn, unknownStatus);
     }
   }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
index e8df44f0b56..fa182d7a22c 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
@@ -35,15 +35,18 @@ public class DiskBalancerStatus {
   private long failureMoveCount;
   private long bytesToMove;
   private long balancedBytes;
+  private double volumeDataDensity;
 
   public DiskBalancerStatus(DiskBalancerRunningStatus isRunning, 
DiskBalancerConfiguration conf,
-      long successMoveCount, long failureMoveCount, long bytesToMove, long 
balancedBytes) {
+      long successMoveCount, long failureMoveCount, long bytesToMove, long 
balancedBytes, 
+      double volumeDataDensity) {
     this.isRunning = isRunning;
     this.diskBalancerConfiguration = conf;
     this.successMoveCount = successMoveCount;
     this.failureMoveCount = failureMoveCount;
     this.bytesToMove = bytesToMove;
     this.balancedBytes = balancedBytes;
+    this.volumeDataDensity = volumeDataDensity;
   }
 
   public DiskBalancerRunningStatus getRunningStatus() {
@@ -69,4 +72,8 @@ public long getBytesToMove() {
   public long getBalancedBytes() {
     return balancedBytes;
   }
+
+  public double getVolumeDataDensity() {
+    return volumeDataDensity;
+  }
 }
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
index dbdf4974145..41d1a09ffaf 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
@@ -65,6 +65,12 @@ public void setup() throws Exception {
 
   @Test
   public void testDatanodeDiskBalancerReport() throws IOException {
+    // Populate disk balancer reports for all datanodes to avoid Double.NaN 
comparison issues.
+    for (DatanodeDetails dn : nodeManager.getAllNodes()) {
+      diskBalancerReportHandler.onMessage(
+          new DiskBalancerReportFromDatanode(dn, generateRandomReport()), 
null);
+    }
+
     List<HddsProtos.DatanodeDiskBalancerInfoProto> reportProtoList =
         diskBalancerManager.getDiskBalancerReport(2,
             ClientVersion.CURRENT_VERSION);
@@ -117,6 +123,7 @@ private DiskBalancerReportProto generateRandomReport() {
     return DiskBalancerReportProto.newBuilder()
         .setIsRunning(random.nextBoolean())
         .setBalancedBytes(random.nextInt(10000))
+        .setVolumeDataDensity(random.nextDouble())
         .setDiskBalancerConf(
             HddsProtos.DiskBalancerConfigurationProto.newBuilder()
                 .setThreshold(random.nextInt(99))
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
index d2620140d72..aebc7318885 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
@@ -34,6 +34,7 @@
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DiskBalancerReportProto;
 import org.apache.hadoop.hdds.scm.HddsTestUtils;
 import org.apache.hadoop.hdds.scm.PlacementPolicy;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -42,7 +43,9 @@
 import 
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity;
 import org.apache.hadoop.hdds.scm.node.DatanodeInfo;
 import org.apache.hadoop.hdds.scm.node.DiskBalancerManager;
+import org.apache.hadoop.hdds.scm.node.DiskBalancerReportHandler;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher;
 import org.apache.hadoop.ozone.ClientVersion;
 import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
@@ -63,6 +66,7 @@ public class TestDiskBalancer {
   private static ScmClient storageClient;
   private static MiniOzoneCluster cluster;
   private static DiskBalancerManager diskBalancerManager;
+  private static DiskBalancerReportHandler diskBalancerReportHandler;
 
   @BeforeAll
   public static void setup() throws Exception {
@@ -76,6 +80,8 @@ public static void setup() throws Exception {
     cluster.waitForClusterToBeReady();
     diskBalancerManager = cluster.getStorageContainerManager()
         .getDiskBalancerManager();
+    diskBalancerReportHandler =
+        new DiskBalancerReportHandler(diskBalancerManager);
 
     for (DatanodeDetails dn: cluster.getStorageContainerManager()
         .getScmNodeManager().getAllNodes()) {
@@ -93,6 +99,13 @@ public static void cleanup() throws Exception {
 
   @Test
   public void testDatanodeDiskBalancerReport() throws IOException {
+    // Populate disk balancer reports for all datanodes to avoid Double.NaN 
comparison issues.
+    for (DatanodeDetails dn : cluster.getStorageContainerManager()
+        .getScmNodeManager().getAllNodes()) {
+      diskBalancerReportHandler.onMessage(
+          new 
SCMDatanodeHeartbeatDispatcher.DiskBalancerReportFromDatanode(dn, 
generateRandomReport()), null);
+    }
+
     List<HddsProtos.DatanodeDiskBalancerInfoProto> reportProtoList =
         storageClient.getDiskBalancerReport(2);
 
@@ -186,4 +199,18 @@ public void testDatanodeDiskBalancerStatus() throws 
IOException, InterruptedExce
         ClientVersion.CURRENT_VERSION);
     assertEquals(1, statusProtoList.size());
   }
+
+  private DiskBalancerReportProto generateRandomReport() {
+    return DiskBalancerReportProto.newBuilder()
+        .setIsRunning(true)
+        .setBalancedBytes(1000)
+        .setVolumeDataDensity(Math.random() * 10)
+        .setDiskBalancerConf(
+            HddsProtos.DiskBalancerConfigurationProto.newBuilder()
+                .setThreshold(10)
+                .setParallelThread(2)
+                .setDiskBandwidthInMB(50)
+                .build())
+        .build();
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to