Author: cutting Date: Mon Apr 16 13:35:23 2007 New Revision: 529389 URL: http://svn.apache.org/viewvc?view=rev&rev=529389 Log: HADOOP-1155. Improve NetworkTopology's algorithm for finding nearby nodes. Contributed by Hairong Kuang.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=529389&r1=529388&r2=529389 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Apr 16 13:35:23 2007 @@ -210,6 +210,9 @@ 63. HADOOP-1258. Fix TestCheckpoint test case to wait for MiniDFSCluster to be active. (Nigel Daley via tomwhite) +64. HADOOP-1155. Improve NetworkTopology's algorithm for finding + nearby nodes, used by HDFS. (Hairong Kuang via cutting) + Release 0.12.3 - 2007-04-06 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=529389&r1=529388&r2=529389 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Mon Apr 16 13:35:23 2007 @@ -626,7 +626,7 @@ blocksMap.nodeIterator( blocks[i] ); it.hasNext(); ) { machineSets[i][ numNodes++ ] = it.next(); } - clusterMap.sortByDistance( client, machineSets[i] ); + clusterMap.pseudoSortByDistance( client, machineSets[i] ); } } @@ -3120,13 +3120,13 @@ throws NotEnoughReplicasException { DatanodeDescriptor result; do { - DatanodeDescriptor[] selectedNodes = + List<DatanodeDescriptor> selectedNodes = chooseRandom(1, nodes, excludedNodes); - if(selectedNodes.length == 0 ) { + if(selectedNodes.size() == 0 ) { throw new NotEnoughReplicasException( "Not able to place enough replicas" ); } - result = (DatanodeDescriptor)(selectedNodes[0]); + result = (DatanodeDescriptor)(selectedNodes.get(0)); } while( !isGoodTarget( result, blocksize, maxNodesPerRack, results)); results.add(result); return result; @@ -3143,13 +3143,13 @@ throws NotEnoughReplicasException { boolean toContinue = true; do { - DatanodeDescriptor[] selectedNodes = + List<DatanodeDescriptor> selectedNodes = chooseRandom(numOfReplicas, nodes, excludedNodes); - if(selectedNodes.length < numOfReplicas) { + if(selectedNodes.size() < numOfReplicas) { toContinue = false; } - for(int i=0; i<selectedNodes.length; i++) { - DatanodeDescriptor result = (DatanodeDescriptor)(selectedNodes[i]); + for(int i=0; i<selectedNodes.size(); i++) { + DatanodeDescriptor result = (DatanodeDescriptor)(selectedNodes.get(i)); if( isGoodTarget( result, blocksize, maxNodesPerRack, results)) { numOfReplicas--; results.add(result); @@ -3166,7 +3166,7 @@ /* Randomly choose <i>numOfNodes</i> nodes from <i>scope</i>. * @return the choosen nodes */ - private DatanodeDescriptor[] chooseRandom(int numOfReplicas, + private List<DatanodeDescriptor> chooseRandom(int numOfReplicas, String nodes, List<DatanodeDescriptor> excludedNodes) { List<DatanodeDescriptor> results = @@ -3183,8 +3183,7 @@ numOfReplicas--; } } - return (DatanodeDescriptor[])results.toArray( - new DatanodeDescriptor[results.size()]); + return results; } /* judge if a node is a good target. Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java?view=diff&rev=529389&r1=529388&r2=529389 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java Mon Apr 16 13:35:23 2007 @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.Comparator; import java.util.List; import java.util.Random; @@ -300,6 +301,7 @@ InnerNode clusterMap = new InnerNode( InnerNode.ROOT ); // the root private int numOfRacks = 0; // rack counter + private HashMap<String, Node> rackMap = new HashMap<String, Node>(); public NetworkTopology() { } @@ -322,6 +324,7 @@ if( clusterMap.add( node) ) { if( rack == null ) { numOfRacks++; + rackMap.put(node.getNetworkLocation(), node.getParent()); } } LOG.debug("NetworkTopology became:\n" + this.toString()); @@ -339,6 +342,7 @@ InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); if(rack == null) { numOfRacks--; + rackMap.remove(node.getNetworkLocation()); } } LOG.debug("NetworkTopology became:\n" + this.toString()); @@ -369,6 +373,12 @@ */ public synchronized Node getNode( String loc ) { loc = NodeBase.normalize(loc); + // optimize searching rack node by looking up the rackMap + Node node = rackMap.get(loc); + if( node != null ) { + return node; + } + // otherwise slower search if(!NodeBase.ROOT.equals(loc)) loc = loc.substring(1); return clusterMap.getLoc( loc ); @@ -440,11 +450,7 @@ return false; } - if( node1 == node2 || node1.equals(node2)) { - return true; - } - - return node1.getParent()==node2.getParent(); + return node1.getParent()==node2.getParent(); } final private static Random r = new Random(); @@ -546,24 +552,47 @@ return tree.toString(); } - /* Set and used only inside sortByDistance. - * This saves an allocation each time we sort. + /** Sort nodes array by their distances to <i>reader</i> + * It linearly scans the array, if a local node is found, swap it with + * the first element of the array. + * If a local rack node is found, swap it with the first element following + * the local node. + * It leaves the rest nodes untouched. */ - private DatanodeDescriptor distFrom = null; - private final Comparator<DatanodeDescriptor> nodeDistanceComparator = - new Comparator<DatanodeDescriptor>() { - public int compare(DatanodeDescriptor n1, DatanodeDescriptor n2) { - return getDistance(distFrom, n1) - getDistance(distFrom, n2); + public synchronized void pseudoSortByDistance( + DatanodeDescriptor reader, DatanodeDescriptor[] nodes ) { + if (reader == null ) return; // no need to sort + + DatanodeDescriptor tempNode; + int tempIndex = 0; + int localRackNode = -1; + //scan the array to find the local node & local rack node + for(int i=0; i<nodes.length; i++) { + if(tempIndex == 0 && reader == nodes[i]) { //local node + //swap the local node and the node at position 0 + if( i != 0 ) { + tempNode = nodes[tempIndex]; + nodes[tempIndex] = nodes[i]; + nodes[i] = tempNode; + } + tempIndex=1; + if(localRackNode != -1 ) { + if(localRackNode == 0) { + localRackNode = i; + } + break; + } + } else if(localRackNode == -1 && isOnSameRack(reader, nodes[i])) { //local rack + localRackNode = i; + if(tempIndex != 0 ) break; } - }; + } - /** Sorts nodes array by their distances to <i>reader</i>. */ - public synchronized void sortByDistance( final DatanodeDescriptor reader, - DatanodeDescriptor[] nodes ) { - if(reader != null && contains(reader)) { - distFrom = reader; - Arrays.sort( nodes, nodeDistanceComparator ); - distFrom = null; + // swap the local rack node and the node at position tempIndex + if(localRackNode != -1 && localRackNode != tempIndex ) { + tempNode = nodes[tempIndex]; + nodes[tempIndex] = nodes[localRackNode]; + nodes[localRackNode] = tempNode; } } } Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java?view=diff&rev=529389&r1=529388&r2=529389 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java Mon Apr 16 13:35:23 2007 @@ -25,7 +25,7 @@ } } - public void testContains() { + public void testContains() throws Exception { for(int i=0; i<dataNodes.length; i++) { assertTrue( cluster.contains(dataNodes[i])); } @@ -53,6 +53,46 @@ assertEquals(cluster.getDistance(dataNodes[0], dataNodes[6]), 6); } + public void testPseudoSortByDistance() throws Exception { + DatanodeDescriptor[] testNodes = new DatanodeDescriptor[3]; + + // array contains both local node & local rack node + testNodes[0] = dataNodes[1]; + testNodes[1] = dataNodes[2]; + testNodes[2] = dataNodes[0]; + cluster.pseudoSortByDistance(dataNodes[0], testNodes ); + assertTrue(testNodes[0] == dataNodes[0]); + assertTrue(testNodes[1] == dataNodes[1]); + assertTrue(testNodes[2] == dataNodes[2]); + + // array contains local node + testNodes[0] = dataNodes[1]; + testNodes[1] = dataNodes[3]; + testNodes[2] = dataNodes[0]; + cluster.pseudoSortByDistance(dataNodes[0], testNodes ); + assertTrue(testNodes[0] == dataNodes[0]); + assertTrue(testNodes[1] == dataNodes[1]); + assertTrue(testNodes[2] == dataNodes[3]); + + // array contains local rack node + testNodes[0] = dataNodes[5]; + testNodes[1] = dataNodes[3]; + testNodes[2] = dataNodes[1]; + cluster.pseudoSortByDistance(dataNodes[0], testNodes ); + assertTrue(testNodes[0] == dataNodes[1]); + assertTrue(testNodes[1] == dataNodes[3]); + assertTrue(testNodes[2] == dataNodes[5]); + + // array contains neither local node & local rack node + testNodes[0] = dataNodes[5]; + testNodes[1] = dataNodes[3]; + testNodes[2] = dataNodes[2]; + cluster.pseudoSortByDistance(dataNodes[0], testNodes ); + assertTrue(testNodes[0] == dataNodes[5]); + assertTrue(testNodes[1] == dataNodes[3]); + assertTrue(testNodes[2] == dataNodes[2]); + } + public void testRemove() throws Exception { for(int i=0; i<dataNodes.length; i++) { cluster.remove( dataNodes[i] );