This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new f3872a78b5 HDDS-9683. Containers present on decommission nodes are 
reported as mis-replicated (#5726)
f3872a78b5 is described below

commit f3872a78b5cd308f58fe9b5a879eb5a0696573aa
Author: Christos Bisias <[email protected]>
AuthorDate: Tue Jan 2 21:58:55 2024 +0200

    HDDS-9683. Containers present on decommission nodes are reported as 
mis-replicated (#5726)
---
 .../hadoop/hdds/scm/SCMCommonPlacementPolicy.java  | 10 ++++
 .../TestSCMContainerPlacementRackAware.java        | 47 +++++++++++++++
 .../TestSCMContainerPlacementRackScatter.java      | 68 ++++++++++++++++++++++
 3 files changed, 125 insertions(+)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
index 46cb142bb1..471a947941 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
@@ -465,6 +465,16 @@ public abstract class SCMCommonPlacementPolicy implements
     }
     int maxReplicasPerRack = getMaxReplicasPerRack(replicas,
             Math.min(requiredRacks, numRacks));
+
+    // There are scenarios where there could be excessive replicas on a rack 
due to nodes
+    // in decommission or maintenance or over-replication in general.
+    // In these cases, ReplicationManager shouldn't report mis-replication.
+    // The original intention of mis-replication was to indicate that there 
isn't enough rack tolerance.
+    // In case of over-replication, this isn't an issue.
+    // Mis-replication should be an issue once over-replication is fixed, not 
before.
+    // Adjust the maximum number of replicas per rack to allow containers
+    // with excessive replicas to not be reported as mis-replicated.
+    maxReplicasPerRack += Math.max(0, dns.size() - replicas);
     return new ContainerPlacementStatusDefault(
         currentRackCount.size(), requiredRacks, numRacks, maxReplicasPerRack,
             currentRackCount);
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
index 0cdcc6ca79..c3e5455098 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
 import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
@@ -52,6 +53,7 @@ import org.mockito.Mockito;
 import org.apache.commons.lang3.StringUtils;
 
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
 import static 
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN;
 import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA;
@@ -61,6 +63,7 @@ import static 
org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
@@ -566,6 +569,50 @@ public class TestSCMContainerPlacementRackAware {
     assertEquals(0, stat.misReplicationCount());
   }
 
+  @ParameterizedTest
+  
@MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates")
+  public void 
testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) {
+    setup(7);
+    //    7 datanodes, all nodes are used.
+    //    /rack0/node0  -> IN_SERVICE
+    //    /rack0/node1  -> IN_SERVICE
+    //    /rack0/node2  -> OFFLINE
+    //    /rack0/node3  -> OFFLINE
+    //    /rack0/node4  -> OFFLINE
+    //    /rack1/node5  -> IN_SERVICE
+    //    /rack1/node6  -> OFFLINE
+    datanodes.get(2).setPersistedOpState(state);
+    datanodes.get(3).setPersistedOpState(state);
+    datanodes.get(4).setPersistedOpState(state);
+    datanodes.get(6).setPersistedOpState(state);
+    List<DatanodeDetails> dns = new ArrayList<>(datanodes);
+
+    ContainerPlacementStatus status = policy.validateContainerPlacement(dns, 
3);
+    assertTrue(status.isPolicySatisfied());
+    assertEquals(2, status.actualPlacementCount());
+    assertEquals(2, status.expectedPlacementCount());
+    assertEquals(0, status.misReplicationCount());
+    assertNull(status.misReplicatedReason());
+
+    //    /rack0/node0  -> IN_SERVICE
+    //    /rack0/node1  -> IN_SERVICE
+    //    /rack0/node2  -> OFFLINE > IN_SERVICE
+    //    /rack0/node3  -> OFFLINE
+    //    /rack0/node4  -> OFFLINE
+    //    /rack1/node5  -> IN_SERVICE
+    //    /rack1/node6  -> OFFLINE > IN_SERVICE
+    datanodes.get(2).setPersistedOpState(IN_SERVICE);
+    datanodes.get(6).setPersistedOpState(IN_SERVICE);
+    dns = new ArrayList<>(datanodes);
+
+    status = policy.validateContainerPlacement(dns, 3);
+    assertTrue(status.isPolicySatisfied());
+    assertEquals(2, status.actualPlacementCount());
+    assertEquals(2, status.expectedPlacementCount());
+    assertEquals(0, status.misReplicationCount());
+    assertNull(status.misReplicatedReason());
+  }
+
   @ParameterizedTest
   @MethodSource("numDatanodes")
   public void testOutOfServiceNodesNotSelected(int datanodeCount) {
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
index 4b16a90dab..0f801c0bbf 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
 import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
@@ -69,6 +70,7 @@ import static org.hamcrest.Matchers.matchesPattern;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
@@ -570,6 +572,72 @@ public class TestSCMContainerPlacementRackScatter {
     assertEquals(0, stat.misReplicationCount());
   }
 
+  @ParameterizedTest
+  
@MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates")
+  public void 
testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) {
+    setup(9, 3);
+    //    9 datanodes, 3 per rack.
+    //    /rack0/node0  -> IN_SERVICE - used
+    //    /rack0/node1
+    //    /rack0/node2
+    //    /rack1/node3  -> IN_SERVICE - used
+    //    /rack1/node4
+    //    /rack1/node5
+    //    /rack2/node6  -> IN_SERVICE - used
+    //    /rack2/node7
+    //    /rack2/node8
+    List<DatanodeDetails> dns = new ArrayList<>();
+    dns.add(datanodes.get(0));
+    dns.add(datanodes.get(3));
+    dns.add(datanodes.get(6));
+
+    ContainerPlacementStatus status = policy.validateContainerPlacement(dns, 
3);
+    assertTrue(status.isPolicySatisfied());
+    assertEquals(3, status.actualPlacementCount());
+    assertEquals(3, status.expectedPlacementCount());
+    assertEquals(0, status.misReplicationCount());
+    assertNull(status.misReplicatedReason());
+
+    //    /rack0/node0  -> IN_SERVICE - used
+    //    /rack0/node1  -> OFFLINE    - used
+    //    /rack0/node2
+    //    /rack1/node3  -> IN_SERVICE - used
+    //    /rack1/node4  -> OFFLINE    - used
+    //    /rack1/node5
+    //    /rack2/node6  -> IN_SERVICE - used
+    //    /rack2/node7
+    //    /rack2/node8
+    datanodes.get(1).setPersistedOpState(state);
+    datanodes.get(4).setPersistedOpState(state);
+    dns.add(datanodes.get(1));
+    dns.add(datanodes.get(4));
+
+    status = policy.validateContainerPlacement(dns, 3);
+    assertTrue(status.isPolicySatisfied());
+    assertEquals(3, status.actualPlacementCount());
+    assertEquals(3, status.expectedPlacementCount());
+    assertEquals(0, status.misReplicationCount());
+    assertNull(status.misReplicatedReason());
+
+    //    /rack0/node0  -> IN_SERVICE - used
+    //    /rack0/node1  -> OFFLINE    - used
+    //    /rack0/node2  -> IN_SERVICE - used
+    //    /rack1/node3  -> IN_SERVICE - used
+    //    /rack1/node4  -> OFFLINE    - used
+    //    /rack1/node5
+    //    /rack2/node6  -> IN_SERVICE - used
+    //    /rack2/node7
+    //    /rack2/node8
+    dns.add(datanodes.get(2));
+
+    status = policy.validateContainerPlacement(dns, 3);
+    assertTrue(status.isPolicySatisfied());
+    assertEquals(3, status.actualPlacementCount());
+    assertEquals(3, status.expectedPlacementCount());
+    assertEquals(0, status.misReplicationCount());
+    assertNull(status.misReplicatedReason());
+  }
+
   public List<DatanodeDetails> getDatanodes(List<Integer> dnIndexes) {
     return dnIndexes.stream().map(datanodes::get).collect(Collectors.toList());
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to