Author: szetszwo
Date: Mon Jun  3 07:15:38 2013
New Revision: 1488867

URL: http://svn.apache.org/r1488867
Log:
svn merge -c 1488865 from branch-1 for HDFS-4261. Fix bugs in Balaner causing 
infinite loop and TestBalancerWithNodeGroup timeing out.

Modified:
    hadoop/common/branches/branch-1-win/   (props changed)
    hadoop/common/branches/branch-1-win/CHANGES.txt   (contents, props changed)
    
hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
    
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java

Propchange: hadoop/common/branches/branch-1-win/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/branch-1:r1488865

Modified: hadoop/common/branches/branch-1-win/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/CHANGES.txt?rev=1488867&r1=1488866&r2=1488867&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1-win/CHANGES.txt Mon Jun  3 07:15:38 2013
@@ -95,6 +95,9 @@ Release 1.1.0 - (partial upto at r129350
 
     HDFS-3008. Negative caching of local addrs doesn't work. (eli)
 
+    HDFS-4261. Fix bugs in Balaner causing infinite loop and
+    TestBalancerWithNodeGroup timeing out.  (Junping Du via szetszwo)
+
   IMPROVEMENTS
 
     MAPREDUCE-3597. [Rumen] Provide a way to access other info of history file

Propchange: hadoop/common/branches/branch-1-win/CHANGES.txt
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/branch-1/CHANGES.txt:r1488865

Modified: 
hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java?rev=1488867&r1=1488866&r2=1488867&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
 (original)
+++ 
hadoop/common/branches/branch-1-win/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
 Mon Jun  3 07:15:38 2013
@@ -198,6 +198,8 @@ public class Balancer implements Tool {
    */
   public static final int MAX_NUM_CONCURRENT_MOVES = 5;
   
+  public static final int MAX_NO_PENDING_BLOCK_ITERATIONS = 5;
+  
   private Configuration conf;
 
   private double threshold = 10D;
@@ -753,6 +755,7 @@ public class Balancer implements Tool {
       long startTime = Util.now();
       this.blocksToReceive = 2*scheduledSize;
       boolean isTimeUp = false;
+      int noPendingBlockIteration = 0;
       while(!isTimeUp && scheduledSize > 0 &&
           (!srcBlockList.isEmpty() || blocksToReceive > 0)) {
         PendingBlockMove pendingBlock = chooseNextBlockToMove();
@@ -776,7 +779,15 @@ public class Balancer implements Tool {
             LOG.warn(StringUtils.stringifyException(e));
             return;
           }
-        } 
+        } else {
+          // source node cannot find a pendingBlockToMove, iteration +1
+          noPendingBlockIteration++;
+          // in case no blocks can be moved for source node's task,
+          // jump out of while-loop after 5 iterations.
+          if (noPendingBlockIteration >= MAX_NO_PENDING_BLOCK_ITERATIONS) {
+            scheduledSize = 0;
+          }
+        }
         
         // check if time is up or not
         if (Util.now()-startTime > MAX_ITERATION_TIME) {
@@ -1485,7 +1496,11 @@ public class Balancer implements Tool {
       Formatter formatter = new Formatter(System.out);
       System.out.println("Time Stamp               Iteration#  Bytes Already 
Moved  Bytes Left To Move  Bytes Being Moved");
       int iterations = 0;
+      
       while (true) {
+        // clean all lists at the beginning of balancer iteration.
+        resetData();
+
         /* get all live datanodes of a cluster and their disk usage
          * decide the number of bytes need to be moved
          */
@@ -1536,9 +1551,6 @@ public class Balancer implements Tool {
             return NO_MOVE_PROGRESS;
           }
         }
-
-        // clean all lists
-        resetData();
         
         try {
           Thread.sleep(2*conf.getLong("dfs.heartbeat.interval", 3));

Modified: 
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java?rev=1488867&r1=1488866&r2=1488867&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
 (original)
+++ 
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
 Mon Jun  3 07:15:38 2013
@@ -216,7 +216,7 @@ public class TestBalancerWithNodeGroup {
    * to n0 or n1 as balancer policy with node group. Thus, we expect the 
balancer
    * to end in 5 iterations without move block process.
    */
-  @Test
+  @Test(timeout=60000)
   public void testBalancerEndInNoMoveProgress() throws Exception {
     Configuration conf = createConf();
     long[] capacities = new long[]{CAPACITY, CAPACITY, CAPACITY, CAPACITY};
@@ -255,7 +255,7 @@ public class TestBalancerWithNodeGroup {
    * Create a cluster with even distribution, and a new empty node is added to
    * the cluster, then test rack locality for balancer policy. 
    */
-  @Test
+  @Test(timeout=60000)
   public void testBalancerWithRackLocality() throws Exception {
     Configuration conf = createConf();
     long[] capacities = new long[]{CAPACITY, CAPACITY};
@@ -294,7 +294,7 @@ public class TestBalancerWithNodeGroup {
       totalCapacity += newCapacity;
 
       // run balancer and validate results
-      runBalancer(conf, totalUsedSpace, totalCapacity);
+      runBalancerCanFinish(conf, totalUsedSpace, totalCapacity);
       
       DatanodeInfo[] datanodeReport = 
               client.getDatanodeReport(DatanodeReportType.ALL);
@@ -321,7 +321,7 @@ public class TestBalancerWithNodeGroup {
   /** Create a cluster with even distribution, and a new empty node is added to
    *  the cluster, then test rack locality for balancer policy. 
    **/
-  @Test
+  @Test(timeout=60000)
   public void testBalancerWithNodeGroup() throws Exception {
     Configuration conf = createConf();
     long[] capacities = new long[]{CAPACITY, CAPACITY};


Reply via email to