Author: dhruba
Date: Thu Nov 29 06:01:46 2007
New Revision: 599447

URL: http://svn.apache.org/viewvc?rev=599447&view=rev
Log:
HADOOP-2256. Fix a buf in the namenode that could cause it to encounter
an infinite loop while deleting excess replicas that were created by
block rebalancing.  (Hairong Kuang via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestBlockReplacement.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=599447&r1=599446&r2=599447&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Thu Nov 29 06:01:46 2007
@@ -165,6 +165,10 @@
     fails to allocate any datanodes for newly allocated block.
     (Dhruba Borthakur via dhruba)
 
+    HADOOP-2256. Fix a buf in the namenode that could cause it to encounter
+    an infinite loop while deleting excess replicas that were created by 
+    block rebalancing.  (Hairong Kuang via dhruba)
+
 Branch 0.15 (unreleased)
 
   BUG FIXES

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=599447&r1=599446&r2=599447&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java 
(original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Thu 
Nov 29 06:01:46 2007
@@ -2431,13 +2431,14 @@
     // pick one node to delete that favors the delete hint
     // otherwise pick one with least space from priSet if it is not empty
     // otherwise one node with least space from remains
+    boolean firstOne = true;
     while (nonExcess.size() - replication > 0) {
       DatanodeInfo cur = null;
       long minSpace = Long.MAX_VALUE;
 
       // check if we can del delNodeHint
-      if( delNodeHint !=null && (priSet.contains(delNodeHint) ||
-          (addedNode != null && !priSet.contains(addedNode))) ) {
+      if (firstOne && delNodeHint !=null && nonExcess.contains(delNodeHint) &&
+            (priSet.contains(delNodeHint) || (addedNode != null && 
!priSet.contains(addedNode))) ) {
           cur = delNodeHint;
       } else { // regular excessive replica removal
         Iterator<DatanodeDescriptor> iter = 
@@ -2453,6 +2454,7 @@
           }
       }
 
+      firstOne = false;
       // adjust rackmap, priSet, and remains
       String rack = cur.getNetworkLocation();
       ArrayList<DatanodeDescriptor> datanodes = rackMap.get(rack);

Modified: 
lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestBlockReplacement.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestBlockReplacement.java?rev=599447&r1=599446&r2=599447&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestBlockReplacement.java 
(original)
+++ 
lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestBlockReplacement.java 
Thu Nov 29 06:01:46 2007
@@ -26,6 +26,8 @@
 import java.util.List;
 import java.util.Random;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.dfs.FSConstants.DatanodeReportType;
 import org.apache.hadoop.fs.FileSystem;
@@ -37,6 +39,9 @@
  * This class tests if block replacement request to data nodes work correctly.
  */
 public class TestBlockReplacement extends TestCase {
+  private static final Log LOG = LogFactory.getLog(
+  "org.apache.hadoop.dfs.TestBlockReplacement");
+
   MiniDFSCluster cluster;
   public void testThrottler() throws IOException {
     Configuration conf = new Configuration();
@@ -136,15 +141,22 @@
       
       // start to replace the block
       // case 1: proxySource does not contain the block
+      LOG.info("Testcase 1: Proxy " + newNode.getName() 
+          + "does not contain the block " + b.getBlockName() );
       assertFalse(replaceBlock(b, source, newNode, proxies.get(0)));
       // case 2: destination contains the block
+      LOG.info("Testcase 2: Destination " + proxies.get(1).getName() 
+          + "contains the block " + b.getBlockName() );
       assertFalse(replaceBlock(b, source, proxies.get(0), proxies.get(1)));
       // case 3: correct case
+      LOG.info("Testcase 3: Proxy=" + source.getName() + "source=" + 
+          proxies.get(0).getName() + " destination=" + newNode.getName() );
       assertTrue(replaceBlock(b, source, proxies.get(0), newNode));
       // block locations should contain two proxies and newNode
       checkBlocks(source, fileName.toString(), 
           DEFAULT_BLOCK_SIZE, REPLICATION_FACTOR);
       // case 4: proxies.get(1) is not a valid del hint
+      LOG.info("Testcase 3: invalid del hint " + proxies.get(0).getName() );
       assertTrue(replaceBlock(b, proxies.get(1), proxies.get(0), source));
       /* block locations should contain two proxies and source;
        * newNode was deleted.
@@ -161,22 +173,26 @@
       long fileLen, short replFactor) throws IOException {
     Boolean notDone;
     do {
+      try {
+        Thread.sleep(100);
+      } catch(InterruptedException e) {
+      }
       List<LocatedBlock> blocks = cluster.getNameNode().
       getBlockLocations(fileName, 0, fileLen).getLocatedBlocks();
       assertEquals(1, blocks.size());
       DatanodeInfo[] nodes = blocks.get(0).getLocations();
       notDone = (nodes.length != replFactor);
-      if(!notDone) {
-      for(DatanodeInfo node:nodes) {
-        if(node.equals(excludedNode) ) {
-          notDone=true; 
-          try {
-            Thread.sleep(100);
-          } catch(InterruptedException e) {
+      if (notDone) {
+        LOG.info("Expected replication factor is " + replFactor +
+            " but the real replication factor is " + nodes.length );
+      } else {
+        for( DatanodeInfo node : nodes) {
+          if (node.equals(excludedNode) ) {
+            notDone=true; 
+            LOG.info("Unexpected block location " + excludedNode.getName() );
+            break;
           }
-          break;
         }
-      }
       }
     } while(notDone);
   }


Reply via email to