[3/3] hadoop git commit: HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat interval. Contributed by Lukas Majercak

2017-12-02 Thread cdouglas
HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat 
interval. Contributed by Lukas Majercak

(cherry picked from commit 42307e3c3abbfe0b83d9a2581deba327435b910f)
(cherry picked from commit 2399c96ee7395bed8318820018d5285d460ac9bc)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0d41249b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0d41249b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0d41249b

Branch: refs/heads/branch-3.0.0
Commit: 0d41249bec2a95ad13ba8065de3921967dcc0ba4
Parents: 419a83e
Author: Chris Douglas 
Authored: Fri Dec 1 22:34:30 2017 -0800
Committer: Chris Douglas 
Committed: Sat Dec 2 23:32:59 2017 -0800

--
 .../apache/hadoop/test/GenericTestUtils.java|  10 +-
 .../server/blockmanagement/BlockManager.java|  40 ++
 .../blockmanagement/PendingRecoveryBlocks.java  | 143 +++
 .../hdfs/server/namenode/FSNamesystem.java  |  40 +++---
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  |  10 ++
 .../TestPendingRecoveryBlocks.java  |  87 +++
 .../hdfs/server/datanode/TestBlockRecovery.java | 107 ++
 .../namenode/ha/TestPipelinesFailover.java  |   4 +-
 8 files changed, 421 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0d41249b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
--
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index 4cb9f8b..72c8d41 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -624,10 +624,16 @@ public abstract class GenericTestUtils {
* conditions.
*/
   public static class SleepAnswer implements Answer {
+private final int minSleepTime;
 private final int maxSleepTime;
 private static Random r = new Random();
-
+
 public SleepAnswer(int maxSleepTime) {
+  this(0, maxSleepTime);
+}
+
+public SleepAnswer(int minSleepTime, int maxSleepTime) {
+  this.minSleepTime = minSleepTime;
   this.maxSleepTime = maxSleepTime;
 }
 
@@ -635,7 +641,7 @@ public abstract class GenericTestUtils {
 public Object answer(InvocationOnMock invocation) throws Throwable {
   boolean interrupted = false;
   try {
-Thread.sleep(r.nextInt(maxSleepTime));
+Thread.sleep(r.nextInt(maxSleepTime) + minSleepTime);
   } catch (InterruptedException ie) {
 interrupted = true;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0d41249b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index bdabd81..f440c73 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -164,6 +164,8 @@ public class BlockManager implements BlockStatsMXBean {
   private static final String QUEUE_REASON_FUTURE_GENSTAMP =
 "generation stamp is in the future";
 
+  private static final long BLOCK_RECOVERY_TIMEOUT_MULTIPLIER = 30;
+
   private final Namesystem namesystem;
 
   private final BlockManagerSafeMode bmSafeMode;
@@ -353,6 +355,9 @@ public class BlockManager implements BlockStatsMXBean {
   @VisibleForTesting
   final PendingReconstructionBlocks pendingReconstruction;
 
+  /** Stores information about block recovery attempts. */
+  private final PendingRecoveryBlocks pendingRecoveryBlocks;
+
   /** The maximum number of replicas allowed for a block */
   public final short maxReplication;
   /**
@@ -549,6 +554,12 @@ public class BlockManager implements BlockStatsMXBean {
 }
 this.minReplicationToBeInMaintenance = (short)minMaintenanceR;
 
+long heartbeatIntervalSecs = conf.getTimeDuration(
+DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
+DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.SECONDS);
+long blockRecoveryTimeout = getBlockRecoveryTimeout(heartbeatIntervalSecs);
+pendingRecoveryBlocks = new 

[3/3] hadoop git commit: HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat interval. Contributed by Lukas Majercak

2017-12-01 Thread cdouglas
HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat 
interval. Contributed by Lukas Majercak

(cherry picked from commit 5304698dc8c5667c33e6ed9c4a827ef57172a723)
(cherry picked from commit 482fd5a880994f37fc3ad9e0cc2d127737b70aef)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/be664bd6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/be664bd6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/be664bd6

Branch: refs/heads/branch-3.0.0
Commit: be664bd64d0dba3356f44605f870e988abfec932
Parents: 7dd435c
Author: Chris Douglas 
Authored: Fri Dec 1 10:29:30 2017 -0800
Committer: Chris Douglas 
Committed: Fri Dec 1 10:35:31 2017 -0800

--
 .../apache/hadoop/test/GenericTestUtils.java|  10 +-
 .../server/blockmanagement/BlockManager.java|  40 ++
 .../blockmanagement/PendingRecoveryBlocks.java  | 143 +++
 .../hdfs/server/namenode/FSNamesystem.java  |  40 +++---
 .../TestPendingRecoveryBlocks.java  |  87 +++
 .../hdfs/server/datanode/TestBlockRecovery.java | 108 ++
 .../namenode/ha/TestPipelinesFailover.java  |   5 +-
 7 files changed, 413 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/be664bd6/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
--
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index 4cb9f8b..72c8d41 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -624,10 +624,16 @@ public abstract class GenericTestUtils {
* conditions.
*/
   public static class SleepAnswer implements Answer {
+private final int minSleepTime;
 private final int maxSleepTime;
 private static Random r = new Random();
-
+
 public SleepAnswer(int maxSleepTime) {
+  this(0, maxSleepTime);
+}
+
+public SleepAnswer(int minSleepTime, int maxSleepTime) {
+  this.minSleepTime = minSleepTime;
   this.maxSleepTime = maxSleepTime;
 }
 
@@ -635,7 +641,7 @@ public abstract class GenericTestUtils {
 public Object answer(InvocationOnMock invocation) throws Throwable {
   boolean interrupted = false;
   try {
-Thread.sleep(r.nextInt(maxSleepTime));
+Thread.sleep(r.nextInt(maxSleepTime) + minSleepTime);
   } catch (InterruptedException ie) {
 interrupted = true;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/be664bd6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index bdabd81..f440c73 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -164,6 +164,8 @@ public class BlockManager implements BlockStatsMXBean {
   private static final String QUEUE_REASON_FUTURE_GENSTAMP =
 "generation stamp is in the future";
 
+  private static final long BLOCK_RECOVERY_TIMEOUT_MULTIPLIER = 30;
+
   private final Namesystem namesystem;
 
   private final BlockManagerSafeMode bmSafeMode;
@@ -353,6 +355,9 @@ public class BlockManager implements BlockStatsMXBean {
   @VisibleForTesting
   final PendingReconstructionBlocks pendingReconstruction;
 
+  /** Stores information about block recovery attempts. */
+  private final PendingRecoveryBlocks pendingRecoveryBlocks;
+
   /** The maximum number of replicas allowed for a block */
   public final short maxReplication;
   /**
@@ -549,6 +554,12 @@ public class BlockManager implements BlockStatsMXBean {
 }
 this.minReplicationToBeInMaintenance = (short)minMaintenanceR;
 
+long heartbeatIntervalSecs = conf.getTimeDuration(
+DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
+DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.SECONDS);
+long blockRecoveryTimeout = getBlockRecoveryTimeout(heartbeatIntervalSecs);
+pendingRecoveryBlocks = new PendingRecoveryBlocks(blockRecoveryTimeout);
+