umamaheswararao commented on code in PR #3574:
URL: https://github.com/apache/ozone/pull/3574#discussion_r911849020


##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java:
##########
@@ -77,10 +77,24 @@ public static class HealthyResult extends 
ContainerHealthResult {
   public static class UnderReplicatedHealthResult
       extends ContainerHealthResult {
 
+    // For under replicated containers, the best remaining redundancy we can
+    // have is 3 for EC-10-4, 2 for EC-6-3, 1 for EC-3-2 and 2 for Ratis.
+    // A container which is under-replicated due to decommission will have one
+    // more, ie 4, 3, 2, 3 respectively. Ideally we want to sort decommission

Review Comment:
   one more means weight right?



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java:
##########
@@ -77,10 +77,24 @@ public static class HealthyResult extends 
ContainerHealthResult {
   public static class UnderReplicatedHealthResult
       extends ContainerHealthResult {
 
+    // For under replicated containers, the best remaining redundancy we can
+    // have is 3 for EC-10-4, 2 for EC-6-3, 1 for EC-3-2 and 2 for Ratis.
+    // A container which is under-replicated due to decommission will have one
+    // more, ie 4, 3, 2, 3 respectively. Ideally we want to sort decommission
+    // only under-replication after all other under-replicated containers.
+    // It may also make sense to allow under-replicated containers a chance to
+    // retry once before processing the decommission only under replication.
+    // Therefore we should adjust the weighted remaining redundancy of
+    // decommission only under-replicated containers to a floor of 5 so they
+    // sort after an under-replicated container with 3 remaining replicas (
+    // EC-10-4) and plus one retry.
+    private static final int DECOMMISSION_REDUNDANCY = 5;

Review Comment:
   Basically if the idea is to keep the decom elements last priority than 
underreplication, will that cause decom to take very long time if there are lot 
of under replication items in that cluster?
   
   



##########
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java:
##########
@@ -245,12 +262,111 @@ public void testOverReplicatedFixByPending()
         ReplicationManagerReport.HealthState.OVER_REPLICATED));
   }
 
+  @Test
+  public void testUnderReplicatedOrderingInQueue()
+      throws ContainerNotFoundException {
+    ContainerInfo decomContainer = createContainerInfo(repConfig, 1,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(decomContainer, Pair.of(DECOMMISSIONING, 1),
+        Pair.of(DECOMMISSIONING, 2), Pair.of(DECOMMISSIONING, 3),
+        Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONING, 5));
+
+    ContainerInfo underRep1 = createContainerInfo(repConfig, 2,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep1, 1, 2, 3, 4);
+    ContainerInfo underRep0 = createContainerInfo(repConfig, 3,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep0, 1, 2, 3);
+
+    replicationManager.processContainer(decomContainer, underRep, overRep,
+        repReport);
+    replicationManager.processContainer(underRep1, underRep, overRep,
+        repReport);
+    replicationManager.processContainer(underRep0, underRep, overRep,
+        repReport);

Review Comment:
   Can we use process all?



##########
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java:
##########
@@ -245,12 +262,111 @@ public void testOverReplicatedFixByPending()
         ReplicationManagerReport.HealthState.OVER_REPLICATED));
   }
 
+  @Test
+  public void testUnderReplicatedOrderingInQueue()
+      throws ContainerNotFoundException {
+    ContainerInfo decomContainer = createContainerInfo(repConfig, 1,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(decomContainer, Pair.of(DECOMMISSIONING, 1),
+        Pair.of(DECOMMISSIONING, 2), Pair.of(DECOMMISSIONING, 3),
+        Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONING, 5));
+
+    ContainerInfo underRep1 = createContainerInfo(repConfig, 2,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep1, 1, 2, 3, 4);
+    ContainerInfo underRep0 = createContainerInfo(repConfig, 3,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep0, 1, 2, 3);
+
+    replicationManager.processContainer(decomContainer, underRep, overRep,
+        repReport);
+    replicationManager.processContainer(underRep1, underRep, overRep,
+        repReport);
+    replicationManager.processContainer(underRep0, underRep, overRep,
+        repReport);
+    // We expect 3 messages on the queue. They should be in the reverse order
+    // to which they were added, putting the lowest remaining redundancy first.
+    Assert.assertEquals(3, underRep.size());
+    ContainerHealthResult.UnderReplicatedHealthResult res = underRep.poll();
+    Assert.assertEquals(underRep0, res.getContainerInfo());
+    res = underRep.poll();
+    Assert.assertEquals(underRep1, res.getContainerInfo());
+    res = underRep.poll();
+    Assert.assertEquals(decomContainer, res.getContainerInfo());
+  }
+
+  @Test
+  public void testUnderReplicationQueuePopulated() {
+    ContainerInfo decomContainer = createContainerInfo(repConfig, 1,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(decomContainer, Pair.of(DECOMMISSIONING, 1),
+        Pair.of(DECOMMISSIONING, 2), Pair.of(DECOMMISSIONING, 3),
+        Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONING, 5));
+
+    ContainerInfo underRep1 = createContainerInfo(repConfig, 2,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep1, 1, 2, 3, 4);
+    ContainerInfo underRep0 = createContainerInfo(repConfig, 3,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep0, 1, 2, 3);
+
+    replicationManager.processAll();

Review Comment:
   Except assertions above test is doing same until here? How about combining?



##########
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java:
##########
@@ -245,12 +262,111 @@ public void testOverReplicatedFixByPending()
         ReplicationManagerReport.HealthState.OVER_REPLICATED));
   }
 
+  @Test
+  public void testUnderReplicatedOrderingInQueue()
+      throws ContainerNotFoundException {
+    ContainerInfo decomContainer = createContainerInfo(repConfig, 1,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(decomContainer, Pair.of(DECOMMISSIONING, 1),
+        Pair.of(DECOMMISSIONING, 2), Pair.of(DECOMMISSIONING, 3),
+        Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONING, 5));
+
+    ContainerInfo underRep1 = createContainerInfo(repConfig, 2,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep1, 1, 2, 3, 4);
+    ContainerInfo underRep0 = createContainerInfo(repConfig, 3,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep0, 1, 2, 3);
+
+    replicationManager.processContainer(decomContainer, underRep, overRep,
+        repReport);
+    replicationManager.processContainer(underRep1, underRep, overRep,
+        repReport);
+    replicationManager.processContainer(underRep0, underRep, overRep,
+        repReport);
+    // We expect 3 messages on the queue. They should be in the reverse order
+    // to which they were added, putting the lowest remaining redundancy first.
+    Assert.assertEquals(3, underRep.size());
+    ContainerHealthResult.UnderReplicatedHealthResult res = underRep.poll();
+    Assert.assertEquals(underRep0, res.getContainerInfo());
+    res = underRep.poll();
+    Assert.assertEquals(underRep1, res.getContainerInfo());
+    res = underRep.poll();
+    Assert.assertEquals(decomContainer, res.getContainerInfo());
+  }
+
+  @Test
+  public void testUnderReplicationQueuePopulated() {
+    ContainerInfo decomContainer = createContainerInfo(repConfig, 1,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(decomContainer, Pair.of(DECOMMISSIONING, 1),
+        Pair.of(DECOMMISSIONING, 2), Pair.of(DECOMMISSIONING, 3),
+        Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONING, 5));
+
+    ContainerInfo underRep1 = createContainerInfo(repConfig, 2,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep1, 1, 2, 3, 4);
+    ContainerInfo underRep0 = createContainerInfo(repConfig, 3,
+        HddsProtos.LifeCycleState.CLOSED);
+    addReplicas(underRep0, 1, 2, 3);
+
+    replicationManager.processAll();
+
+    // Get the first message off the queue - it should be underRep0.
+    ContainerHealthResult.UnderReplicatedHealthResult res
+        = replicationManager.dequeueUnderReplicatedContainer();
+    Assert.assertEquals(underRep0, res.getContainerInfo());
+
+    // Now requeue it
+    replicationManager.requeueUnderReplicatedContainer(res);
+
+    // Now get the next message. It should be underRep1, as it has remaining
+    // redundancy 1 + zero retries. UnderRep1 will have remaining redundancy 0

Review Comment:
   You mean UnderRep0 will have remaining reundancy 0?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to