sodonnel commented on code in PR #4614:
URL: https://github.com/apache/ozone/pull/4614#discussion_r1175183604


##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java:
##########
@@ -99,7 +99,177 @@ public SCMContainerPlacementRackAware(final NodeManager 
nodeManager,
    */
   @Override
   protected List<DatanodeDetails> chooseDatanodesInternal(
-          List<DatanodeDetails> usedNodes,
+      List<DatanodeDetails> usedNodes,
+      List<DatanodeDetails> excludedNodes,
+      List<DatanodeDetails> favoredNodes, int nodesRequired,
+      long metadataSizeRequired, long dataSizeRequired)
+      throws SCMException {
+    if (!usedNodesPassed(usedNodes)) {
+      // If interface is called without used nodes
+      // In this case consider only exclude nodes to determine racks
+      return chooseDatanodesInternalLegacy(excludedNodes,
+          favoredNodes, nodesRequired, metadataSizeRequired, dataSizeRequired);
+    }
+    Preconditions.checkArgument(nodesRequired > 0);
+    metrics.incrDatanodeRequestCount(nodesRequired);
+    int datanodeCount = networkTopology.getNumOfLeafNode(NetConstants.ROOT);
+    int excludedNodesCount = excludedNodes == null ? 0 : excludedNodes.size();
+    int usedNodesCount = usedNodes == null ? 0 : usedNodes.size();
+    if (datanodeCount < nodesRequired + excludedNodesCount + usedNodesCount) {
+      throw new SCMException("No enough datanodes to choose. " +
+          "TotalNode = " + datanodeCount +
+          " RequiredNode = " + nodesRequired +
+          " ExcludedNode = " + excludedNodesCount +
+          " UsedNode = " + usedNodesCount, null);
+    }
+    List<DatanodeDetails> mutableFavoredNodes = favoredNodes;
+    // sanity check of favoredNodes
+    if (mutableFavoredNodes != null && excludedNodes != null) {
+      mutableFavoredNodes = new ArrayList<>();
+      mutableFavoredNodes.addAll(favoredNodes);
+      mutableFavoredNodes.removeAll(excludedNodes);
+    }
+    int favoredNodeNum = mutableFavoredNodes == null ? 0 :
+        mutableFavoredNodes.size();
+
+    List<DatanodeDetails> chosenNodes = new ArrayList<>();
+    List<DatanodeDetails> mutableUsedNodes = new ArrayList<>();
+    mutableUsedNodes.addAll(usedNodes);
+    List<DatanodeDetails> mutableExcludedNodes = new ArrayList<>();
+    if (excludedNodes != null) {
+      mutableExcludedNodes.addAll(excludedNodes);
+    }
+
+    DatanodeDetails favoredNode;
+    int favorIndex = 0;
+    if (mutableUsedNodes.size() == 0) {
+      // choose all nodes for a new pipeline case
+      // choose first datanode from scope ROOT or from favoredNodes if not null
+      favoredNode = favoredNodeNum > favorIndex ?
+          mutableFavoredNodes.get(favorIndex) : null;
+      DatanodeDetails firstNode;
+      if (favoredNode != null) {
+        firstNode = favoredNode;
+        favorIndex++;
+      } else {
+        firstNode = chooseNode(mutableExcludedNodes, null, 
metadataSizeRequired,
+            dataSizeRequired);
+      }
+      chosenNodes.add(firstNode);
+      nodesRequired--;
+      if (nodesRequired == 0) {
+        return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+      }
+
+      // choose second datanode on the same rack as first one
+      favoredNode = favoredNodeNum > favorIndex ?
+          mutableFavoredNodes.get(favorIndex) : null;
+      DatanodeDetails secondNode;
+      if (favoredNode != null &&
+          networkTopology.isSameParent(firstNode, favoredNode)) {
+        secondNode = favoredNode;
+        favorIndex++;
+      } else {
+        mutableExcludedNodes.add(firstNode);
+        secondNode = chooseNode(mutableExcludedNodes, Arrays.asList(firstNode),
+            metadataSizeRequired, dataSizeRequired);
+      }
+      chosenNodes.add(secondNode);
+      nodesRequired--;
+      if (nodesRequired == 0) {
+        return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+      }
+      // choose remaining datanodes on different rack as first and second
+      mutableExcludedNodes.add(secondNode);
+    } else {
+
+      // choose node to meet replication requirement
+      // case 1: one used node, choose one on the same rack as the used
+      // node, choose others on different racks.
+      if (mutableUsedNodes.size() == 1) {
+        favoredNode = favoredNodeNum > favorIndex ?
+            mutableFavoredNodes.get(favorIndex) : null;
+        DatanodeDetails firstNode;
+        if (favoredNode != null &&
+            networkTopology.isSameParent(mutableUsedNodes.get(0),
+            favoredNode)) {
+          firstNode = favoredNode;
+          favorIndex++;
+        } else {
+          firstNode = chooseNode(mutableExcludedNodes, mutableUsedNodes,
+              metadataSizeRequired, dataSizeRequired);
+        }
+        chosenNodes.add(firstNode);
+        nodesRequired--;
+        if (nodesRequired == 0) {
+          return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+        }
+
+        // choose remaining nodes on different racks
+        mutableExcludedNodes.add(firstNode);
+        mutableExcludedNodes.addAll(mutableUsedNodes);
+      } else {
+        // case 2: two or more used nodes, if these two nodes are
+        // in the same rack, then choose nodes on different racks, otherwise,
+        // choose one on the same rack as one of used
+        // nodes, remaining chosen
+        // are on different racks.
+        for (int i = 0; i < usedNodesCount; i++) {
+          for (int j = i + 1; j < usedNodesCount; j++) {
+            if (networkTopology.isSameParent(
+                usedNodes.get(i), usedNodes.get(j))) {
+              // choose remaining nodes on different racks
+              mutableExcludedNodes.addAll(mutableUsedNodes);
+              return chooseNodes(mutableExcludedNodes, chosenNodes,
+                  mutableFavoredNodes, favorIndex, nodesRequired,
+                  metadataSizeRequired, dataSizeRequired);
+            }
+          }
+        }
+        // choose one data on the same rack with one used node
+        favoredNode = favoredNodeNum > favorIndex ?
+            mutableFavoredNodes.get(favorIndex) : null;
+        DatanodeDetails secondNode;
+        if (favoredNode != null && networkTopology.isSameParent(
+            chosenNodes.get(0), favoredNode)) {
+          secondNode = favoredNode;
+          favorIndex++;
+        } else {
+          secondNode =
+              chooseNode(mutableExcludedNodes, mutableUsedNodes,
+                  metadataSizeRequired, dataSizeRequired);
+        }
+        chosenNodes.add(secondNode);
+        mutableExcludedNodes.add(secondNode);
+        mutableExcludedNodes.addAll(mutableUsedNodes);
+        nodesRequired--;
+        if (nodesRequired == 0) {
+          return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+        }
+      }
+    }
+    // choose remaining nodes on different racks
+    return chooseNodes(mutableExcludedNodes, chosenNodes, mutableFavoredNodes,

Review Comment:
   Perhaps, we could change the signature of this:
   
   ```
     private DatanodeDetails chooseNode(List<DatanodeDetails> excludedNodes,
         List<DatanodeDetails> affinityNodes, long metadataSizeRequired,
         long dataSizeRequired) throws SCMException {
   ```
   
   To be
   
   ```
     private DatanodeDetails chooseNode(List<DatanodeDetails> excludedNodes,
         List<DatanodeDetails> usedNodes, long metadataSizeRequired,
         long dataSizeRequired, boolean selectSameRackAsUsedNodes) throws 
SCMException {
   ```
   
   Then if sameRackAsUsedNodes is true, the code works as currently. If it is 
false, we add excludedNodes to the excludeNodesForCapacity list, and pass 
usedNodes as excluded into the topology. Do you think that would work?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to