sodonnel commented on code in PR #3405:
URL: https://github.com/apache/ozone/pull/3405#discussion_r871282016


##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java:
##########
@@ -0,0 +1,309 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdds.scm.container;
+
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
+
+/**
+ * This class provides a set of methods to test for over / under replication of
+ * EC containers, taking into account decommission / maintenance nodes,
+ * pending replications, pending deletes and the existing replicas.
+ *
+ * The intention for this class, is to wrap the logic used to detect over and
+ * under replication to allow other areas to easily check the status of a
+ * container.
+ *
+ * For calculating under replication:
+ *
+ *   * Assume that decommission replicas are already lost, as they
+ *     will eventually go away.
+ *   * Any pending deletes are treated as if they have deleted
+ *   * Pending adds are ignored as they may fail to create.
+ *
+ * Similar for over replication:
+ *
+ *   * Assume decommissioned replicas are already lost.
+ *   * Pending delete replicas will complete
+ *   * Pending adds are ignored as they may not complete.
+ *   * Maintenance copies are not considered until they are back to IN_SERVICE
+ */
+
+public class ECContainerReplicaCount {
+
+  private final ECReplicationConfig repConfig;
+  private final List<Integer> pendingAdd;
+  private final int remainingMaintenanceRedundancy;
+  private final Map<Integer, Integer> healthyIndexes = new HashMap<>();
+  private final Map<Integer, Integer> decommissionIndexes = new HashMap<>();
+  private final Map<Integer, Integer> maintenanceIndexes = new HashMap<>();
+
+  public ECContainerReplicaCount(ContainerInfo containerInfo,
+      Set<ContainerReplica> replicas, List<Integer> indexesPendingAdd,
+      List<Integer> indexesPendingDelete, int remainingMaintenanceRedundancy) {
+    this.repConfig = (ECReplicationConfig)containerInfo.getReplicationConfig();
+    this.pendingAdd = indexesPendingAdd;
+    this.remainingMaintenanceRedundancy
+        = Math.min(repConfig.getParity(), remainingMaintenanceRedundancy);
+
+    for (ContainerReplica replica : replicas) {
+      HddsProtos.NodeOperationalState state =
+          replica.getDatanodeDetails().getPersistedOpState();
+      int index = replica.getReplicaIndex();
+      ensureIndexWithinBounds(index);
+      if (state == DECOMMISSIONED || state == DECOMMISSIONING) {
+        int val = decommissionIndexes.getOrDefault(index, 0);
+        decommissionIndexes.put(index, val + 1);
+      } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
+        int val = maintenanceIndexes.getOrDefault(index, 0);
+        maintenanceIndexes.put(index, val + 1);
+      } else {
+        int val = healthyIndexes.getOrDefault(index, 0);
+        healthyIndexes.put(index, val + 1);
+      }
+    }
+    // Remove the pending delete replicas from the healthy set as we assume 
they
+    // will eventually be removed and reduce the count for this replica. If the
+    // count goes to zero, remove it from the map.
+    for (Integer i : indexesPendingDelete) {
+      ensureIndexWithinBounds(i);
+      Integer count = healthyIndexes.get(i);
+      if (count != null) {
+        count = count - 1;
+        if (count < 1) {
+          healthyIndexes.remove(i);
+        } else {
+          healthyIndexes.put(i, count);
+        }
+      }
+    }
+    // Ensure any pending adds are within bounds
+    for (Integer i : pendingAdd) {
+      ensureIndexWithinBounds(i);
+    }
+  }
+
+  /**
+   * Get a set containing all decommissioning indexes, or an empty set if none
+   * are decommissioning. Note it is possible for an index to be
+   * decommissioning, healthy and in maintenance, if there are multiple copies
+   * of it.
+   */
+  public Set<Integer> decommissioningIndexes() {
+    return decommissionIndexes.keySet();
+  }
+
+  /**
+   * Get a set containing all maintenance indexes, or an empty set if none are
+   * in maintenance. Note it is possible for an index to be
+   * decommissioning, healthy and in maintenance, if there are multiple copies
+   * of it.
+   * @return
+   */
+  public Set<Integer> maintenanceIndexes() {
+    return maintenanceIndexes.keySet();
+  }
+
+  /**
+   * Return true if there are insufficient replicas to recover this container.
+   * Ie, less than EC Datanum containers are present.
+   * @return True if the container cannot be recovered, false otherwise.
+   */
+  public boolean isMissing() {
+    Set<Integer> distinct = new HashSet<>();
+    distinct.addAll(healthyIndexes.keySet());
+    distinct.addAll(decommissionIndexes.keySet());
+    distinct.addAll(maintenanceIndexes.keySet());
+    return distinct.size() < repConfig.getData();
+  }
+
+  /**
+   * Returns an unsorted list of indexes which need additional copies to
+   * ensure the container is sufficiently replicated. These missing indexes 
will
+   * not be on maintenance nodes, although they may be on decommissioning 
nodes.
+   * Replicas pending delete are assumed to be removed and any pending add
+   * are assume to be created and omitted them from the returned list. This 
list
+   * can be used to determine which replicas must be recovered in a group,
+   * assuming the inflight replicas pending add complete successfully.
+   * @return List of missing indexes
+   */
+  public List<Integer> missingNonMaintenanceIndexes() {
+    if (isSufficientlyReplicated()) {
+      return Collections.emptyList();
+    }
+    Set<Integer> missing = new HashSet<>();
+    for (int i = 1; i <= repConfig.getRequiredNodes(); i++) {
+      if (!healthyIndexes.containsKey(i)) {
+        missing.add(i);
+      }
+    }
+    // Now we have a list of missing. Remove any pending add as they should
+    // eventually recover.
+    for (Integer i : pendingAdd) {
+      missing.remove(i);
+    }

Review Comment:
   No, the intention in this case, is to get a list of indexes we must schedule 
new copies for, so we assume the adds will complete. From the java doc above:
   
   > and any pending add are assume to be created and omitted them from the 
returned list. This list can be used to determine which replicas must be 
recovered in a group, assuming the inflight replicas pending add complete 
successfully.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to