This is an automated email from the ASF dual-hosted git repository.

peterxcli pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-5713 by this push:
     new 8eb944a65a HDDS-13212. [DiskBalancer] Fix Inconsistent Health Check in 
DiskBalancer Status for Specific Hosts (#8610)
8eb944a65a is described below

commit 8eb944a65a8d31a641d3a2a1a0d2deda4e277506
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Sat Jun 14 15:01:45 2025 +0530

    HDDS-13212. [DiskBalancer] Fix Inconsistent Health Check in DiskBalancer 
Status for Specific Hosts (#8610)
---
 .../hadoop/hdds/scm/node/DiskBalancerManager.java  | 20 ++++++--
 .../cli/datanode/DiskBalancerStatusSubcommand.java |  2 +-
 .../hadoop/ozone/scm/node/TestDiskBalancer.java    | 57 +++++++++++++++++++++-
 3 files changed, 73 insertions(+), 6 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
index 6a74099413..6b377da6b7 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
@@ -39,6 +39,7 @@
 import org.apache.hadoop.hdds.scm.DatanodeAdminError;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.ha.SCMContext;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.storage.DiskBalancerConfiguration;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
@@ -107,7 +108,21 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto> 
getDiskBalancerStatus(
     List<DatanodeDetails> filterDns = null;
     if (hosts.isPresent() && !hosts.get().isEmpty()) {
       filterDns = NodeUtils.mapHostnamesToDatanodes(nodeManager, hosts.get(),
-          useHostnames);
+          useHostnames).stream()
+          .filter(dn -> {
+            try {
+              NodeStatus nodeStatus = nodeManager.getNodeStatus(dn);
+              if (nodeStatus != NodeStatus.inServiceHealthy()) {
+                LOG.warn("Datanode {} is not in optimal state for disk 
balancing." +
+                    " NodeStatus: {}", dn.getHostName(), nodeStatus);
+                return false;
+              }
+              return true;
+            } catch (NodeNotFoundException e) {
+              throw new RuntimeException(e);
+            }
+          })
+          .collect(Collectors.toList());
     }
 
     // Filter Running Status by default
@@ -120,8 +135,7 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto> 
getDiskBalancerStatus(
           .map(dn -> getInfoProto(dn, clientVersion))
           .collect(Collectors.toList());
     } else {
-      return nodeManager.getNodes(IN_SERVICE,
-              HddsProtos.NodeState.HEALTHY).stream()
+      return nodeManager.getNodes(NodeStatus.inServiceHealthy()).stream()
           .filter(dn -> shouldReturnDatanode(filterStatus, dn))
           .map(dn -> getInfoProto((DatanodeInfo)dn, clientVersion))
           .collect(Collectors.toList());
diff --git 
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
 
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
index 5ae2b37965..d8d0113eb1 100644
--- 
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
+++ 
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
@@ -34,7 +34,7 @@
  */
 @Command(
     name = "status",
-    description = "Get Datanode DiskBalancer Status",
+    description = "Get Datanode DiskBalancer Status for inServiceHealthy DNs",
     mixinStandardHelpOptions = true,
     versionProvider = HddsVersionProvider.class)
 public class DiskBalancerStatusSubcommand extends ScmSubcommand {
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
index 13d361ad2f..f2c60d9899 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
@@ -17,11 +17,16 @@
 
 package org.apache.hadoop.ozone.scm.node;
 
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+import static org.apache.hadoop.hdds.scm.node.TestNodeUtil.getDNHostAndPort;
+import static 
org.apache.hadoop.hdds.scm.node.TestNodeUtil.waitForDnToReachOpState;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
@@ -37,6 +42,8 @@
 import 
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity;
 import org.apache.hadoop.hdds.scm.node.DatanodeInfo;
 import org.apache.hadoop.hdds.scm.node.DiskBalancerManager;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.ozone.ClientVersion;
 import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerService;
@@ -130,7 +137,53 @@ public void testDiskBalancerStopAfterEven() throws 
IOException,
   }
 
   @Test
-  public void testDatanodeDiskBalancerStatus() throws IOException {
-    // TODO: Test status command with datanodes in balancing
+  public void testDatanodeDiskBalancerStatus() throws IOException, 
InterruptedException, TimeoutException {
+    List<HddsDatanodeService> dns = cluster.getHddsDatanodes();
+    DatanodeDetails toDecommission = dns.get(0).getDatanodeDetails();
+
+    diskBalancerManager.startDiskBalancer(
+        Optional.of(10.0), // threshold
+        Optional.of(10L),  // bandwidth in MB
+        Optional.of(5),    // parallel threads
+        Optional.of(true), // stopAfterDiskEven
+        Optional.empty());
+
+    //all DNs IN_SERVICE, so disk balancer status for all should be present
+    List<HddsProtos.DatanodeDiskBalancerInfoProto> statusProtoList =
+        diskBalancerManager.getDiskBalancerStatus(Optional.empty(),
+            Optional.empty(),
+            ClientVersion.CURRENT_VERSION);
+    assertEquals(3, statusProtoList.size());
+
+    NodeManager nm = cluster.getStorageContainerManager().getScmNodeManager();
+
+    // Decommission the first DN
+    storageClient.decommissionNodes(Arrays.asList(
+        getDNHostAndPort(toDecommission)), false);
+    waitForDnToReachOpState(nm, toDecommission, DECOMMISSIONING);
+
+    //one DN is in DECOMMISSIONING state, so disk balancer status for it 
should not be present
+    statusProtoList = 
diskBalancerManager.getDiskBalancerStatus(Optional.empty(),
+        Optional.empty(),
+        ClientVersion.CURRENT_VERSION);
+    assertEquals(2, statusProtoList.size());
+
+    // Check status for the decommissioned DN should not be present
+    statusProtoList = diskBalancerManager.getDiskBalancerStatus(
+        
Optional.of(Collections.singletonList(getDNHostAndPort(toDecommission))),
+        Optional.empty(),
+        ClientVersion.CURRENT_VERSION);
+    assertEquals(0, statusProtoList.size());
+
+    storageClient.recommissionNodes(Arrays.asList(
+        getDNHostAndPort(toDecommission)));
+    waitForDnToReachOpState(nm, toDecommission, IN_SERVICE);
+
+    // Check status for the recommissioned DN should now be present
+    statusProtoList = diskBalancerManager.getDiskBalancerStatus(
+        
Optional.of(Collections.singletonList(getDNHostAndPort(toDecommission))),
+        Optional.empty(),
+        ClientVersion.CURRENT_VERSION);
+    assertEquals(1, statusProtoList.size());
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to