[GitHub] [hadoop] xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default pipeline placement policy implementation.

2019-09-03 Thread GitBox
xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default 
pipeline placement policy implementation.
URL: https://github.com/apache/hadoop/pull/1366#discussion_r320404457
 
 

 ##
 File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
 ##
 @@ -0,0 +1,237 @@
+package org.apache.hadoop.hdds.scm.pipeline;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import 
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMCommonPolicy;
+import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
+import org.apache.hadoop.hdds.scm.exceptions.SCMException;
+import org.apache.hadoop.hdds.scm.net.NetworkTopology;
+import org.apache.hadoop.hdds.scm.net.Node;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Pipeline placement policy that choose datanodes based on load balancing and 
network topology
+ * to supply pipeline creation.
+ * 
+ * 1. get a list of healthy nodes
+ * 2. filter out viable nodes that either don't have enough size left
+ *or are too heavily engaged in other pipelines
+ * 3. Choose an anchor node among the viable nodes which follows the algorithm
+ *described @SCMContainerPlacementCapacity
+ * 4. Choose other nodes around the anchor node based on network topology
+ */
+public final class PipelinePlacementPolicy extends SCMCommonPolicy {
 
 Review comment:
   Sounds good to me. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default pipeline placement policy implementation.

2019-08-28 Thread GitBox
xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default 
pipeline placement policy implementation.
URL: https://github.com/apache/hadoop/pull/1366#discussion_r318876175
 
 

 ##
 File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
 ##
 @@ -0,0 +1,237 @@
+package org.apache.hadoop.hdds.scm.pipeline;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import 
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMCommonPolicy;
+import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
+import org.apache.hadoop.hdds.scm.exceptions.SCMException;
+import org.apache.hadoop.hdds.scm.net.NetworkTopology;
+import org.apache.hadoop.hdds.scm.net.Node;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Pipeline placement policy that choose datanodes based on load balancing and 
network topology
+ * to supply pipeline creation.
+ * 
+ * 1. get a list of healthy nodes
+ * 2. filter out viable nodes that either don't have enough size left
+ *or are too heavily engaged in other pipelines
+ * 3. Choose an anchor node among the viable nodes which follows the algorithm
+ *described @SCMContainerPlacementCapacity
+ * 4. Choose other nodes around the anchor node based on network topology
+ */
+public final class PipelinePlacementPolicy extends SCMCommonPolicy {
+@VisibleForTesting
+static final Logger LOG =
+LoggerFactory.getLogger(PipelinePlacementPolicy.class);
+private final NodeManager nodeManager;
+private final Configuration conf;
+private final int heavy_node_criteria;
+
+/**
+ * Constructs a Container Placement with considering only capacity.
+ * That is this policy tries to place containers based on node weight.
+ *
+ * @param nodeManager Node Manager
+ * @param conf Configuration
+ */
+public PipelinePlacementPolicy(final NodeManager nodeManager,
+   final Configuration conf) {
+super(nodeManager, conf);
+this.nodeManager = nodeManager;
+this.conf = conf;
+heavy_node_criteria = 
conf.getInt(ScmConfigKeys.OZONE_SCM_DATANODE_MAX_PIPELINE_ENGAGEMENT,
+
ScmConfigKeys.OZONE_SCM_DATANODE_MAX_PIPELINE_ENGAGEMENT_DEFAULT);
+}
+
+/**
+ * Returns true if this node meets the criteria.
+ *
+ * @param datanodeDetails DatanodeDetails
+ * @return true if we have enough space.
+ */
+boolean meetCriteria(DatanodeDetails datanodeDetails,
+   long sizeRequired) {
+SCMNodeMetric nodeMetric = nodeManager.getNodeStat(datanodeDetails);
+boolean hasEnoughSpace = (nodeMetric != null) && (nodeMetric.get() != 
null)
+&& nodeMetric.get().getRemaining().hasResources(sizeRequired);
+boolean loadNotTooHeavy = 
nodeManager.getPipelinesCount(datanodeDetails) <= heavy_node_criteria;
+return hasEnoughSpace && loadNotTooHeavy;
+}
+
+/**
+ * Filter out viable nodes based on
+ * 1. nodes that are healthy
+ * 2. nodes that have enough space
+ * 3. nodes that are not too heavily engaged in other pipelines
+ * @param excludedNodes - excluded nodes
+ * @param nodesRequired - number of datanodes required.
+ * @param sizeRequired - size required for the container or block.
+ * @return a list of viable nodes
+ * @throws SCMException when viable nodes are not enough in numbers
+ */
+List filterViableNodes(List 
excludedNodes,
+int nodesRequired, final long 
sizeRequired) throws SCMException {
+// get nodes in HEALTHY state
+List healthyNodes =
+nodeManager.getNodes(HddsProtos.NodeState.HEALTHY);
+if (excludedNodes != null) {
+healthyNodes.removeAll(excludedNodes);
+}
+String msg;
+if (healthyNodes.size() == 0) {
+msg = "No healthy node found to allocate container.";
+LOG.error(msg);
+throw new SCMException(msg, SCMException.ResultCodes
+.FAILED_TO_FIND_HEALTHY_NODES);
+}
+
+if (healthyNodes.size() < nodesRequired) {
+msg = String.format("Not enough healthy nodes to allocate 
container. %d "
++ " datanodes required. Found %d",
+nodesRequired, healthyNodes.size());
+LOG.error(msg);
+throw new SCMException(msg,
+

[GitHub] [hadoop] xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default pipeline placement policy implementation.

2019-08-28 Thread GitBox
xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default 
pipeline placement policy implementation.
URL: https://github.com/apache/hadoop/pull/1366#discussion_r318874570
 
 

 ##
 File path: 
hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
 ##
 @@ -329,6 +329,10 @@
   "ozone.scm.pipeline.owner.container.count";
   public static final int OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT = 3;
 
+  public static final String OZONE_SCM_DATANODE_MAX_PIPELINE_ENGAGEMENT =
 
 Review comment:
   Can we add some comments for this key and the recommended values? 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default pipeline placement policy implementation.

2019-08-28 Thread GitBox
xiaoyuyao commented on a change in pull request #1366: HDDS-1577. Add default 
pipeline placement policy implementation.
URL: https://github.com/apache/hadoop/pull/1366#discussion_r318874255
 
 

 ##
 File path: 
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
 ##
 @@ -0,0 +1,237 @@
+package org.apache.hadoop.hdds.scm.pipeline;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import 
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMCommonPolicy;
+import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
+import org.apache.hadoop.hdds.scm.exceptions.SCMException;
+import org.apache.hadoop.hdds.scm.net.NetworkTopology;
+import org.apache.hadoop.hdds.scm.net.Node;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Pipeline placement policy that choose datanodes based on load balancing and 
network topology
+ * to supply pipeline creation.
+ * 
+ * 1. get a list of healthy nodes
+ * 2. filter out viable nodes that either don't have enough size left
+ *or are too heavily engaged in other pipelines
+ * 3. Choose an anchor node among the viable nodes which follows the algorithm
+ *described @SCMContainerPlacementCapacity
+ * 4. Choose other nodes around the anchor node based on network topology
+ */
+public final class PipelinePlacementPolicy extends SCMCommonPolicy {
 
 Review comment:
   This is not an issue specific to this patch. But I think the class hierarchy 
needs some adjustment. Currently:
   PipelinePlacementPolicy<-SCMCommonPolicy<-ContainerPlacementPolicy
   
   Should we change to have the SCMCommonPolicy as the base for both 
PipelinePlacementPolicy and ContainerPlacementPolicy, if there are common 
pieces between PipelinePlaceMent and ContainerPlacement, we can move them to 
them to SCMCommonPolicy.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org