[
https://issues.apache.org/jira/browse/HDFS-14181?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sihai Ke updated HDFS-14181:
----------------------------
Description:
During reading the hadoop NetworkTopology.java, I suspect there is a bug in
function
chooseRandom (line 498, hadoop version 2.9.2-RC0),
I think there is a bug in{color:#f79232} code, ~excludedScope doesn't mean
availableNodes under Scope node, and I also add unit test for this and get an
exception.{color}
bug code in the else.
{code:java}
// code placeholder
if (excludedScope == null) {
availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
} else {
availableNodes =
countNumOfAvailableNodes("~" + excludedScope, excludedNodes);
}{code}
Source code:
{code:java}
// code placeholder
protected Node chooseRandom(final String scope, String excludedScope,
final Collection<Node> excludedNodes) {
if (excludedScope != null) {
if (scope.startsWith(excludedScope)) {
return null;
}
if (!excludedScope.startsWith(scope)) {
excludedScope = null;
}
}
Node node = getNode(scope);
if (!(node instanceof InnerNode)) {
return excludedNodes != null && excludedNodes.contains(node) ?
null : node;
}
InnerNode innerNode = (InnerNode)node;
int numOfDatanodes = innerNode.getNumOfLeaves();
if (excludedScope == null) {
node = null;
} else {
node = getNode(excludedScope);
if (!(node instanceof InnerNode)) {
numOfDatanodes -= 1;
} else {
numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
}
}
if (numOfDatanodes <= 0) {
LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
+ " numOfDatanodes={}",
scope, excludedScope, numOfDatanodes);
return null;
}
final int availableNodes;
if (excludedScope == null) {
availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
} else {
availableNodes =
countNumOfAvailableNodes("~" + excludedScope, excludedNodes);
}
LOG.debug("Choosing random from {} available nodes on node {},"
+ " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
availableNodes, innerNode, scope, excludedScope, excludedNodes,
numOfDatanodes);
Node ret = null;
if (availableNodes > 0) {
ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes,
availableNodes);
}
LOG.debug("chooseRandom returning {}", ret);
return ret;
}
{code}
Add Unit Test in TestClusterTopology.java, but get exception.
{code:java}
// code placeholder
@Test
public void testChooseRandom1() {
// create the topology
NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
NodeElement node1 = getNewNode("node1", "/a1/b1/c1");
cluster.add(node1);
NodeElement node2 = getNewNode("node2", "/a1/b1/c1");
cluster.add(node2);
NodeElement node3 = getNewNode("node3", "/a1/b1/c2");
cluster.add(node3);
NodeElement node4 = getNewNode("node4", "/a1/b2/c3");
cluster.add(node4);
Node node = cluster.chooseRandom("/a1/b1", "/a1/b1/c1", null);
assertSame(node.getName(), "node3");
}
{code}
Exception:
{code:java}
// code placeholder
java.lang.IllegalArgumentException: 1 should >= 2, and both should be positive.
at com.google.common.base.Preconditions.checkArgument(Preconditions.java:88) at
org.apache.hadoop.net.NetworkTopology.chooseRandom(NetworkTopology.java:567) at
org.apache.hadoop.net.NetworkTopology.chooseRandom(NetworkTopology.java:544) at
org.apache.hadoop.net.TestClusterTopology.testChooseRandom1(TestClusterTopology.java:198)
{code}
{color:#f79232}!image-2018-12-29-15-02-19-415.png!{color}
[~vagarychen] this change is imported in PR HDFS-11577, could you help to check
whether this is a bug ?
was:
During reading the hadoop NetworkTopology.java, I suspect there is a bug in
function
chooseRandom (line 498, hadoop version 2.9.2-RC0),
I think there is a bug in{color:#f79232} code, ~excludedScope doesn't mean
availableNodes under Scope node, and I also add unit test for this and get an
exception.{color}
{code:java}
// code placeholder
protected Node chooseRandom(final String scope, String excludedScope,
final Collection<Node> excludedNodes) {
if (excludedScope != null) {
if (scope.startsWith(excludedScope)) {
return null;
}
if (!excludedScope.startsWith(scope)) {
excludedScope = null;
}
}
Node node = getNode(scope);
if (!(node instanceof InnerNode)) {
return excludedNodes != null && excludedNodes.contains(node) ?
null : node;
}
InnerNode innerNode = (InnerNode)node;
int numOfDatanodes = innerNode.getNumOfLeaves();
if (excludedScope == null) {
node = null;
} else {
node = getNode(excludedScope);
if (!(node instanceof InnerNode)) {
numOfDatanodes -= 1;
} else {
numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
}
}
if (numOfDatanodes <= 0) {
LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
+ " numOfDatanodes={}",
scope, excludedScope, numOfDatanodes);
return null;
}
final int availableNodes;
if (excludedScope == null) {
availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
} else {
availableNodes =
countNumOfAvailableNodes("~" + excludedScope, excludedNodes);
}
LOG.debug("Choosing random from {} available nodes on node {},"
+ " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
availableNodes, innerNode, scope, excludedScope, excludedNodes,
numOfDatanodes);
Node ret = null;
if (availableNodes > 0) {
ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes,
availableNodes);
}
LOG.debug("chooseRandom returning {}", ret);
return ret;
}
{code}
Add Unit Test in TestClusterTopology.java, but get exception.
{code:java}
// code placeholder
@Test
public void testChooseRandom1() {
// create the topology
NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
NodeElement node1 = getNewNode("node1", "/a1/b1/c1");
cluster.add(node1);
NodeElement node2 = getNewNode("node2", "/a1/b1/c1");
cluster.add(node2);
NodeElement node3 = getNewNode("node3", "/a1/b1/c2");
cluster.add(node3);
NodeElement node4 = getNewNode("node4", "/a1/b2/c3");
cluster.add(node4);
Node node = cluster.chooseRandom("/a1/b1", "/a1/b1/c1", null);
assertSame(node.getName(), "node3");
}
{code}
Exception:
{code:java}
// code placeholder
java.lang.IllegalArgumentException: 1 should >= 2, and both should be positive.
at com.google.common.base.Preconditions.checkArgument(Preconditions.java:88) at
org.apache.hadoop.net.NetworkTopology.chooseRandom(NetworkTopology.java:567) at
org.apache.hadoop.net.NetworkTopology.chooseRandom(NetworkTopology.java:544) at
org.apache.hadoop.net.TestClusterTopology.testChooseRandom1(TestClusterTopology.java:198)
{code}
{color:#f79232}!image-2018-12-29-15-02-19-415.png!{color}
[~vagarychen] this change is imported in PR HDFS-11577, could you help to check
whether this is a bug ?
> Suspect there is a bug in NetworkTopology.java chooseRandom function.
> ---------------------------------------------------------------------
>
> Key: HDFS-14181
> URL: https://issues.apache.org/jira/browse/HDFS-14181
> Project: Hadoop HDFS
> Issue Type: Bug
> Components: hdfs, namenode
> Affects Versions: 2.9.2
> Reporter: Sihai Ke
> Priority: Major
> Attachments: image-2018-12-29-15-02-19-415.png
>
>
> During reading the hadoop NetworkTopology.java, I suspect there is a bug in
> function
> chooseRandom (line 498, hadoop version 2.9.2-RC0),
> I think there is a bug in{color:#f79232} code, ~excludedScope doesn't mean
> availableNodes under Scope node, and I also add unit test for this and get an
> exception.{color}
> bug code in the else.
> {code:java}
> // code placeholder
> if (excludedScope == null) {
> availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
> } else {
> availableNodes =
> countNumOfAvailableNodes("~" + excludedScope, excludedNodes);
> }{code}
> Source code:
> {code:java}
> // code placeholder
> protected Node chooseRandom(final String scope, String excludedScope,
> final Collection<Node> excludedNodes) {
> if (excludedScope != null) {
> if (scope.startsWith(excludedScope)) {
> return null;
> }
> if (!excludedScope.startsWith(scope)) {
> excludedScope = null;
> }
> }
> Node node = getNode(scope);
> if (!(node instanceof InnerNode)) {
> return excludedNodes != null && excludedNodes.contains(node) ?
> null : node;
> }
> InnerNode innerNode = (InnerNode)node;
> int numOfDatanodes = innerNode.getNumOfLeaves();
> if (excludedScope == null) {
> node = null;
> } else {
> node = getNode(excludedScope);
> if (!(node instanceof InnerNode)) {
> numOfDatanodes -= 1;
> } else {
> numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
> }
> }
> if (numOfDatanodes <= 0) {
> LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
> + " numOfDatanodes={}",
> scope, excludedScope, numOfDatanodes);
> return null;
> }
> final int availableNodes;
> if (excludedScope == null) {
> availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
> } else {
> availableNodes =
> countNumOfAvailableNodes("~" + excludedScope, excludedNodes);
> }
> LOG.debug("Choosing random from {} available nodes on node {},"
> + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
> availableNodes, innerNode, scope, excludedScope, excludedNodes,
> numOfDatanodes);
> Node ret = null;
> if (availableNodes > 0) {
> ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes,
> availableNodes);
> }
> LOG.debug("chooseRandom returning {}", ret);
> return ret;
> }
> {code}
>
>
> Add Unit Test in TestClusterTopology.java, but get exception.
>
> {code:java}
> // code placeholder
> @Test
> public void testChooseRandom1() {
> // create the topology
> NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
> NodeElement node1 = getNewNode("node1", "/a1/b1/c1");
> cluster.add(node1);
> NodeElement node2 = getNewNode("node2", "/a1/b1/c1");
> cluster.add(node2);
> NodeElement node3 = getNewNode("node3", "/a1/b1/c2");
> cluster.add(node3);
> NodeElement node4 = getNewNode("node4", "/a1/b2/c3");
> cluster.add(node4);
> Node node = cluster.chooseRandom("/a1/b1", "/a1/b1/c1", null);
> assertSame(node.getName(), "node3");
> }
> {code}
>
> Exception:
> {code:java}
> // code placeholder
> java.lang.IllegalArgumentException: 1 should >= 2, and both should be
> positive. at
> com.google.common.base.Preconditions.checkArgument(Preconditions.java:88) at
> org.apache.hadoop.net.NetworkTopology.chooseRandom(NetworkTopology.java:567)
> at
> org.apache.hadoop.net.NetworkTopology.chooseRandom(NetworkTopology.java:544)
> at
> org.apache.hadoop.net.TestClusterTopology.testChooseRandom1(TestClusterTopology.java:198)
> {code}
>
> {color:#f79232}!image-2018-12-29-15-02-19-415.png!{color}
>
>
> [~vagarychen] this change is imported in PR HDFS-11577, could you help to
> check whether this is a bug ?
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]