This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new f3872a78b5 HDDS-9683. Containers present on decommission nodes are
reported as mis-replicated (#5726)
f3872a78b5 is described below
commit f3872a78b5cd308f58fe9b5a879eb5a0696573aa
Author: Christos Bisias <[email protected]>
AuthorDate: Tue Jan 2 21:58:55 2024 +0200
HDDS-9683. Containers present on decommission nodes are reported as
mis-replicated (#5726)
---
.../hadoop/hdds/scm/SCMCommonPlacementPolicy.java | 10 ++++
.../TestSCMContainerPlacementRackAware.java | 47 +++++++++++++++
.../TestSCMContainerPlacementRackScatter.java | 68 ++++++++++++++++++++++
3 files changed, 125 insertions(+)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
index 46cb142bb1..471a947941 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
@@ -465,6 +465,16 @@ public abstract class SCMCommonPlacementPolicy implements
}
int maxReplicasPerRack = getMaxReplicasPerRack(replicas,
Math.min(requiredRacks, numRacks));
+
+ // There are scenarios where there could be excessive replicas on a rack
due to nodes
+ // in decommission or maintenance or over-replication in general.
+ // In these cases, ReplicationManager shouldn't report mis-replication.
+ // The original intention of mis-replication was to indicate that there
isn't enough rack tolerance.
+ // In case of over-replication, this isn't an issue.
+ // Mis-replication should be an issue once over-replication is fixed, not
before.
+ // Adjust the maximum number of replicas per rack to allow containers
+ // with excessive replicas to not be reported as mis-replicated.
+ maxReplicasPerRack += Math.max(0, dns.size() - replicas);
return new ContainerPlacementStatusDefault(
currentRackCount.size(), requiredRacks, numRacks, maxReplicasPerRack,
currentRackCount);
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
index 0cdcc6ca79..c3e5455098 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
@@ -52,6 +53,7 @@ import org.mockito.Mockito;
import org.apache.commons.lang3.StringUtils;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN;
import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA;
@@ -61,6 +63,7 @@ import static
org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
@@ -566,6 +569,50 @@ public class TestSCMContainerPlacementRackAware {
assertEquals(0, stat.misReplicationCount());
}
+ @ParameterizedTest
+
@MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates")
+ public void
testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) {
+ setup(7);
+ // 7 datanodes, all nodes are used.
+ // /rack0/node0 -> IN_SERVICE
+ // /rack0/node1 -> IN_SERVICE
+ // /rack0/node2 -> OFFLINE
+ // /rack0/node3 -> OFFLINE
+ // /rack0/node4 -> OFFLINE
+ // /rack1/node5 -> IN_SERVICE
+ // /rack1/node6 -> OFFLINE
+ datanodes.get(2).setPersistedOpState(state);
+ datanodes.get(3).setPersistedOpState(state);
+ datanodes.get(4).setPersistedOpState(state);
+ datanodes.get(6).setPersistedOpState(state);
+ List<DatanodeDetails> dns = new ArrayList<>(datanodes);
+
+ ContainerPlacementStatus status = policy.validateContainerPlacement(dns,
3);
+ assertTrue(status.isPolicySatisfied());
+ assertEquals(2, status.actualPlacementCount());
+ assertEquals(2, status.expectedPlacementCount());
+ assertEquals(0, status.misReplicationCount());
+ assertNull(status.misReplicatedReason());
+
+ // /rack0/node0 -> IN_SERVICE
+ // /rack0/node1 -> IN_SERVICE
+ // /rack0/node2 -> OFFLINE > IN_SERVICE
+ // /rack0/node3 -> OFFLINE
+ // /rack0/node4 -> OFFLINE
+ // /rack1/node5 -> IN_SERVICE
+ // /rack1/node6 -> OFFLINE > IN_SERVICE
+ datanodes.get(2).setPersistedOpState(IN_SERVICE);
+ datanodes.get(6).setPersistedOpState(IN_SERVICE);
+ dns = new ArrayList<>(datanodes);
+
+ status = policy.validateContainerPlacement(dns, 3);
+ assertTrue(status.isPolicySatisfied());
+ assertEquals(2, status.actualPlacementCount());
+ assertEquals(2, status.expectedPlacementCount());
+ assertEquals(0, status.misReplicationCount());
+ assertNull(status.misReplicatedReason());
+ }
+
@ParameterizedTest
@MethodSource("numDatanodes")
public void testOutOfServiceNodesNotSelected(int datanodeCount) {
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
index 4b16a90dab..0f801c0bbf 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
@@ -69,6 +70,7 @@ import static org.hamcrest.Matchers.matchesPattern;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
@@ -570,6 +572,72 @@ public class TestSCMContainerPlacementRackScatter {
assertEquals(0, stat.misReplicationCount());
}
+ @ParameterizedTest
+
@MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates")
+ public void
testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) {
+ setup(9, 3);
+ // 9 datanodes, 3 per rack.
+ // /rack0/node0 -> IN_SERVICE - used
+ // /rack0/node1
+ // /rack0/node2
+ // /rack1/node3 -> IN_SERVICE - used
+ // /rack1/node4
+ // /rack1/node5
+ // /rack2/node6 -> IN_SERVICE - used
+ // /rack2/node7
+ // /rack2/node8
+ List<DatanodeDetails> dns = new ArrayList<>();
+ dns.add(datanodes.get(0));
+ dns.add(datanodes.get(3));
+ dns.add(datanodes.get(6));
+
+ ContainerPlacementStatus status = policy.validateContainerPlacement(dns,
3);
+ assertTrue(status.isPolicySatisfied());
+ assertEquals(3, status.actualPlacementCount());
+ assertEquals(3, status.expectedPlacementCount());
+ assertEquals(0, status.misReplicationCount());
+ assertNull(status.misReplicatedReason());
+
+ // /rack0/node0 -> IN_SERVICE - used
+ // /rack0/node1 -> OFFLINE - used
+ // /rack0/node2
+ // /rack1/node3 -> IN_SERVICE - used
+ // /rack1/node4 -> OFFLINE - used
+ // /rack1/node5
+ // /rack2/node6 -> IN_SERVICE - used
+ // /rack2/node7
+ // /rack2/node8
+ datanodes.get(1).setPersistedOpState(state);
+ datanodes.get(4).setPersistedOpState(state);
+ dns.add(datanodes.get(1));
+ dns.add(datanodes.get(4));
+
+ status = policy.validateContainerPlacement(dns, 3);
+ assertTrue(status.isPolicySatisfied());
+ assertEquals(3, status.actualPlacementCount());
+ assertEquals(3, status.expectedPlacementCount());
+ assertEquals(0, status.misReplicationCount());
+ assertNull(status.misReplicatedReason());
+
+ // /rack0/node0 -> IN_SERVICE - used
+ // /rack0/node1 -> OFFLINE - used
+ // /rack0/node2 -> IN_SERVICE - used
+ // /rack1/node3 -> IN_SERVICE - used
+ // /rack1/node4 -> OFFLINE - used
+ // /rack1/node5
+ // /rack2/node6 -> IN_SERVICE - used
+ // /rack2/node7
+ // /rack2/node8
+ dns.add(datanodes.get(2));
+
+ status = policy.validateContainerPlacement(dns, 3);
+ assertTrue(status.isPolicySatisfied());
+ assertEquals(3, status.actualPlacementCount());
+ assertEquals(3, status.expectedPlacementCount());
+ assertEquals(0, status.misReplicationCount());
+ assertNull(status.misReplicatedReason());
+ }
+
public List<DatanodeDetails> getDatanodes(List<Integer> dnIndexes) {
return dnIndexes.stream().map(datanodes::get).collect(Collectors.toList());
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]