[ 
https://issues.apache.org/jira/browse/HDFS-14181?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sihai Ke updated HDFS-14181:
----------------------------
    Description: 
During reading the hadoop NetworkTopology.java, I suspect there is a bug in 
function 

chooseRandom (line 498, hadoop version 2.9.2-RC0), 

 I think there is a bug in{color:#f79232} code, ~excludedScope doesn't mean 
availableNodes under Scope node, and I also add unit test for this and get an 
exception.{color}

protected Node chooseRandom(final String scope, String excludedScope,
 final Collection<Node> excludedNodes) {
 if (excludedScope != null) {
 if (scope.startsWith(excludedScope)) \{ return null; }
 if (!excludedScope.startsWith(scope)) \{ excludedScope = null; }
 }
 Node node = getNode(scope);
 if (!(node instanceof InnerNode)) \{ return excludedNodes != null && 
excludedNodes.contains(node) ? null : node; }
 InnerNode innerNode = (InnerNode)node;
 int numOfDatanodes = innerNode.getNumOfLeaves();
 if (excludedScope == null) \{ node = null; } else {
 node = getNode(excludedScope);
 if (!(node instanceof InnerNode)) \{ numOfDatanodes -= 1; } else \{ 
numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); }
 }
 if (numOfDatanodes <= 0) {
 LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
 + " numOfDatanodes={}",
 scope, excludedScope, numOfDatanodes);
 return null;
 }
 final int availableNodes;
 if (excludedScope == null) \{ availableNodes = countNumOfAvailableNodes(scope, 
excludedNodes); } else {
 availableNodes =
 {color:#f79232} countNumOfAvailableNodes("~" + excludedScope, 
excludedNodes);{color}
 }
 LOG.debug("Choosing random from {} available nodes on node {},"
 + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
 availableNodes, innerNode, scope, excludedScope, excludedNodes,
 numOfDatanodes);
 Node ret = null;
 if (availableNodes > 0)

 

{ ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes, 
availableNodes); }

LOG.debug("chooseRandom returning {}", ret);
 return ret;
 }

 

Add Unit Test in TestClusterTopology.java, but get exception.
{quote}@Test
public void testChooseRandom1() {
 // create the topology
 NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
 NodeElement node1 = getNewNode("node1", "/a1/b1/c1");
 cluster.add(node1);
 NodeElement node2 = getNewNode("node2", "/a1/b1/c1");
 cluster.add(node2);
 NodeElement node3 = getNewNode("node3", "/a1/b1/c2");
 cluster.add(node3);
 NodeElement node4 = getNewNode("node4", "/a1/b2/c3");
 cluster.add(node4);

 Node node = cluster.chooseRandom("/a1/b1", "/a1/b1/c1", null);
 assertSame(node.getName(), "node3");
}{quote}
 


 {color:#f79232}!image-2018-12-29-15-02-19-415.png!{color}

 

 

[~vagarychen] this change is imported in PR HDFS-11577, could you help to check 
whether this is a bug ?

 

  was:
During reading the hadoop NetworkTopology.java, I suspect there is a bug in 
function 

chooseRandom (line 498, hadoop version 2.9.2-RC0), 

 I think there is a bug in{color:#f79232} code,{color:#333333} ~excludedScope 
doesn't mean availableNodes under Scope node, and I also add unit test for this 
and get an exception.{color}{color}
{quote}{color:#f79232}{color:#333333}protected Node chooseRandom(final String 
scope, String excludedScope,
 final Collection<Node> excludedNodes) {
 if (excludedScope != null) {
 if (scope.startsWith(excludedScope)) {
 return null;
 }
 if (!excludedScope.startsWith(scope)) {
 excludedScope = null;
 }
 }
 Node node = getNode(scope);
 if (!(node instanceof InnerNode)) {
 return excludedNodes != null && excludedNodes.contains(node) ?
 null : node;
 }
 InnerNode innerNode = (InnerNode)node;
 int numOfDatanodes = innerNode.getNumOfLeaves();
 if (excludedScope == null) {
 node = null;
 } else {
 node = getNode(excludedScope);
 if (!(node instanceof InnerNode)) {
 numOfDatanodes -= 1;
 } else {
 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
 }
 }
 if (numOfDatanodes <= 0) {
 LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
 + " numOfDatanodes={}",
 scope, excludedScope, numOfDatanodes);
 return null;
 }
 final int availableNodes;
 if (excludedScope == null) {
 availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
 } else {
 {color:#f79232}availableNodes ={color}
{color:#f79232} countNumOfAvailableNodes("~" + excludedScope, 
excludedNodes);{color}
 }
 LOG.debug("Choosing random from {} available nodes on node {},"
 + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
 availableNodes, innerNode, scope, excludedScope, excludedNodes,
 numOfDatanodes);
 Node ret = null;
 if (availableNodes > 0) {
 ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes,
 availableNodes);
 }
 LOG.debug("chooseRandom returning {}", ret);
 return ret;
}{color}{color}
{quote}
 

{color:#f79232}{color:#333333}Add Unit Test in TestClusterTopology.java, but 
get exception.{color}{color}
{quote}public void testChooseRandom1() {
 // create the topology
 NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
 NodeElement node1 = getNewNode("node1", "/a1/b1/c1");
 cluster.add(node1);
 NodeElement node2 = getNewNode("node2", "/a1/b1/c1");
 cluster.add(node2);
 NodeElement node3 = getNewNode("node3", "/a1/b1/c2");
 cluster.add(node3);
 NodeElement node4 = getNewNode("node4", "/a1/b2/c3");
 cluster.add(node4);

 Node node = cluster.chooseRandom("/a1/b1", "/a1/b1/c1", null);
 assertSame(node.getName(), "node3");
}{quote}
{color:#f79232}{color:#333333}!image-2018-12-29-15-02-19-415.png!{color}{color}

 

[~vagarychen] this change is imported in PR HDFS-11577, could you help to check 
whether this is a bug ?

 


> Suspect there is a bug in NetworkTopology.java chooseRandom function.
> ---------------------------------------------------------------------
>
>                 Key: HDFS-14181
>                 URL: https://issues.apache.org/jira/browse/HDFS-14181
>             Project: Hadoop HDFS
>          Issue Type: Bug
>          Components: hdfs, namenode
>    Affects Versions: 2.9.2
>            Reporter: Sihai Ke
>            Priority: Major
>         Attachments: image-2018-12-29-15-02-19-415.png
>
>
> During reading the hadoop NetworkTopology.java, I suspect there is a bug in 
> function 
> chooseRandom (line 498, hadoop version 2.9.2-RC0), 
>  I think there is a bug in{color:#f79232} code, ~excludedScope doesn't mean 
> availableNodes under Scope node, and I also add unit test for this and get an 
> exception.{color}
> protected Node chooseRandom(final String scope, String excludedScope,
>  final Collection<Node> excludedNodes) {
>  if (excludedScope != null) {
>  if (scope.startsWith(excludedScope)) \{ return null; }
>  if (!excludedScope.startsWith(scope)) \{ excludedScope = null; }
>  }
>  Node node = getNode(scope);
>  if (!(node instanceof InnerNode)) \{ return excludedNodes != null && 
> excludedNodes.contains(node) ? null : node; }
>  InnerNode innerNode = (InnerNode)node;
>  int numOfDatanodes = innerNode.getNumOfLeaves();
>  if (excludedScope == null) \{ node = null; } else {
>  node = getNode(excludedScope);
>  if (!(node instanceof InnerNode)) \{ numOfDatanodes -= 1; } else \{ 
> numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); }
>  }
>  if (numOfDatanodes <= 0) {
>  LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
>  + " numOfDatanodes={}",
>  scope, excludedScope, numOfDatanodes);
>  return null;
>  }
>  final int availableNodes;
>  if (excludedScope == null) \{ availableNodes = 
> countNumOfAvailableNodes(scope, excludedNodes); } else {
>  availableNodes =
>  {color:#f79232} countNumOfAvailableNodes("~" + excludedScope, 
> excludedNodes);{color}
>  }
>  LOG.debug("Choosing random from {} available nodes on node {},"
>  + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
>  availableNodes, innerNode, scope, excludedScope, excludedNodes,
>  numOfDatanodes);
>  Node ret = null;
>  if (availableNodes > 0)
>  
> { ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes, 
> availableNodes); }
> LOG.debug("chooseRandom returning {}", ret);
>  return ret;
>  }
>  
> Add Unit Test in TestClusterTopology.java, but get exception.
> {quote}@Test
> public void testChooseRandom1() {
>  // create the topology
>  NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
>  NodeElement node1 = getNewNode("node1", "/a1/b1/c1");
>  cluster.add(node1);
>  NodeElement node2 = getNewNode("node2", "/a1/b1/c1");
>  cluster.add(node2);
>  NodeElement node3 = getNewNode("node3", "/a1/b1/c2");
>  cluster.add(node3);
>  NodeElement node4 = getNewNode("node4", "/a1/b2/c3");
>  cluster.add(node4);
>  Node node = cluster.chooseRandom("/a1/b1", "/a1/b1/c1", null);
>  assertSame(node.getName(), "node3");
> }{quote}
>  
>  {color:#f79232}!image-2018-12-29-15-02-19-415.png!{color}
>  
>  
> [~vagarychen] this change is imported in PR HDFS-11577, could you help to 
> check whether this is a bug ?
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to