sodonnel commented on code in PR #8014: URL: https://github.com/apache/ozone/pull/8014#discussion_r1989114123
########## hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckReplicaCount.java: ########## @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED; +import static org.apache.ratis.util.Preconditions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Tests for the QuasiClosedStuckReplicaCount class. + */ +public class TestQuasiClosedStuckReplicaCount { + + private UUID origin1; + private UUID origin2; + private UUID origin3; + + @BeforeEach + public void setUp() { + origin1 = UUID.randomUUID(); + origin2 = UUID.randomUUID(); + origin3 = UUID.randomUUID(); + } + + @Test + public void testCorrectlyReplicationWithThreeOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + assertTrue(replicaCount.getUnderReplicatedReplicas().isEmpty()); + } + + @Test + public void testCorrectReplicationWithTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + assertTrue(replicaCount.getUnderReplicatedReplicas().isEmpty()); + } + + @Test + public void testCorrectReplicationWithOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + assertTrue(replicaCount.getUnderReplicatedReplicas().isEmpty()); + } + + @Test + public void testUnderReplicationWithThreeOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 1, 1, origin3); + } + + @Test + public void testUnderReplicationWithThreeOriginsTwoUnderReplicated() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + + List<QuasiClosedStuckReplicaCount.MisReplicatedOrigin> misReplicatedOrigins = + replicaCount.getUnderReplicatedReplicas(); + assertTrue(misReplicatedOrigins.size() == 2); + + for (QuasiClosedStuckReplicaCount.MisReplicatedOrigin misReplicatedOrigin : misReplicatedOrigins) { + UUID source = misReplicatedOrigin.getSources().iterator().next().getOriginDatanodeId(); + assertTrue(source.equals(origin1) || source.equals(origin3)); + } + } + + @Test + public void testUnderReplicationWithTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 1, 1, origin2); + } + + @Test + public void testUnderReplicationWithOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 1, 2, origin1); + } + + @Test + public void testOverReplicationWithThreeOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertTrue(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getOverReplicatedOrigins(), 1, 3, 1, origin3); + } + + @Test + public void testOverReplicationWithTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertTrue(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getOverReplicatedOrigins(), 1, 3, 1, origin2); + } + + @Test + public void testOverReplicationWithOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin1, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertTrue(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getOverReplicatedOrigins(), 1, 4, 1, origin1); + } + + @Test + public void testUnderReplicationDueToDecommissionWithThreeOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, DECOMMISSIONING), Pair.of(origin1, DECOMMISSIONING), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 2, 2, origin1); + } + + @Test + public void testUnderReplicationDueToDecommissionWithTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONING), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 2, 1, origin1); + } + + @Test + public void testUnderReplicationDueToDecommissionWithOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONING), Pair.of(origin1, DECOMMISSIONING)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 3, 2, origin1); + } + + @Test + public void testNoOverReplicationWithOutOfServiceReplicasWithThreeOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONED), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE), + Pair.of(origin3, IN_SERVICE), Pair.of(origin3, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testNoOverReplicationWithOutOfServiceReplicasWithTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, DECOMMISSIONED), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testNoOverReplicationWithOutOfServiceReplicasWithOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin1, DECOMMISSIONED)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testUnderReplicationWithMaintenanceWithOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, ENTERING_MAINTENANCE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + + replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, ENTERING_MAINTENANCE), Pair.of(origin1, ENTERING_MAINTENANCE)); + + replicaCount = new QuasiClosedStuckReplicaCount(replicas, 2); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 3, 1, origin1); + } + + @Test + public void testUnderReplicationWithMaintenanceWithTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, ENTERING_MAINTENANCE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + + replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, ENTERING_MAINTENANCE), Pair.of(origin1, ENTERING_MAINTENANCE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertTrue(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + validateMisReplicatedOrigins(replicaCount.getUnderReplicatedReplicas(), 1, 2, 1, origin1); + } + + @Test + public void testNoOverReplicationWithExcessMaintenanceReplicasTwoOrigins() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_MAINTENANCE), + Pair.of(origin2, IN_SERVICE), Pair.of(origin2, IN_SERVICE)); + + QuasiClosedStuckReplicaCount replicaCount = new QuasiClosedStuckReplicaCount(replicas, 1); + assertFalse(replicaCount.isUnderReplicated()); + assertFalse(replicaCount.isOverReplicated()); + } + + @Test + public void testNoOverReplicationWithExcessMaintenanceReplicasOneOrigin() { + Set<ContainerReplica> replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState( + ContainerID.valueOf(1), QUASI_CLOSED, + Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), Pair.of(origin1, IN_SERVICE), + Pair.of(origin1, IN_MAINTENANCE)); Review Comment: I was working with a decision to keep the over rep handling simpler. For maintenance replicas, consider them for under-replication as that allow nodes to be taken offline, but if they somehow get over-replicated like here, we just ignore it until the maintenance replicas are either gone or back in service. So the normal flow for one of the origins would be: IN_SERVICE, ENTERING_MAINTENANCE To IN_SERVICE, IN_MAINTENANCE Then, if another replica appears somehow (node that was offline that comes back maybe) it will be: IN_SERVICE, IN_SERVICE, IN_MAINTENANCE Later we will end up with: IN_SERVICE, IN_SERVICE, IN_SERVICE And deal with it. So basically, for over replication, only consider IN_SERVICE replicas and ignore Maintenance. For under replication, we do have to consider maintenance to allow it to be replicated on not depending on the situation. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
