[3/3] hadoop git commit: HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat interval. Contributed by Lukas Majercak
HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat interval. Contributed by Lukas Majercak (cherry picked from commit 42307e3c3abbfe0b83d9a2581deba327435b910f) (cherry picked from commit 2399c96ee7395bed8318820018d5285d460ac9bc) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0d41249b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0d41249b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0d41249b Branch: refs/heads/branch-3.0.0 Commit: 0d41249bec2a95ad13ba8065de3921967dcc0ba4 Parents: 419a83e Author: Chris DouglasAuthored: Fri Dec 1 22:34:30 2017 -0800 Committer: Chris Douglas Committed: Sat Dec 2 23:32:59 2017 -0800 -- .../apache/hadoop/test/GenericTestUtils.java| 10 +- .../server/blockmanagement/BlockManager.java| 40 ++ .../blockmanagement/PendingRecoveryBlocks.java | 143 +++ .../hdfs/server/namenode/FSNamesystem.java | 40 +++--- .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 10 ++ .../TestPendingRecoveryBlocks.java | 87 +++ .../hdfs/server/datanode/TestBlockRecovery.java | 107 ++ .../namenode/ha/TestPipelinesFailover.java | 4 +- 8 files changed, 421 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0d41249b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java -- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 4cb9f8b..72c8d41 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -624,10 +624,16 @@ public abstract class GenericTestUtils { * conditions. */ public static class SleepAnswer implements Answer { +private final int minSleepTime; private final int maxSleepTime; private static Random r = new Random(); - + public SleepAnswer(int maxSleepTime) { + this(0, maxSleepTime); +} + +public SleepAnswer(int minSleepTime, int maxSleepTime) { + this.minSleepTime = minSleepTime; this.maxSleepTime = maxSleepTime; } @@ -635,7 +641,7 @@ public abstract class GenericTestUtils { public Object answer(InvocationOnMock invocation) throws Throwable { boolean interrupted = false; try { -Thread.sleep(r.nextInt(maxSleepTime)); +Thread.sleep(r.nextInt(maxSleepTime) + minSleepTime); } catch (InterruptedException ie) { interrupted = true; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/0d41249b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index bdabd81..f440c73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -164,6 +164,8 @@ public class BlockManager implements BlockStatsMXBean { private static final String QUEUE_REASON_FUTURE_GENSTAMP = "generation stamp is in the future"; + private static final long BLOCK_RECOVERY_TIMEOUT_MULTIPLIER = 30; + private final Namesystem namesystem; private final BlockManagerSafeMode bmSafeMode; @@ -353,6 +355,9 @@ public class BlockManager implements BlockStatsMXBean { @VisibleForTesting final PendingReconstructionBlocks pendingReconstruction; + /** Stores information about block recovery attempts. */ + private final PendingRecoveryBlocks pendingRecoveryBlocks; + /** The maximum number of replicas allowed for a block */ public final short maxReplication; /** @@ -549,6 +554,12 @@ public class BlockManager implements BlockStatsMXBean { } this.minReplicationToBeInMaintenance = (short)minMaintenanceR; +long heartbeatIntervalSecs = conf.getTimeDuration( +DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, +DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.SECONDS); +long blockRecoveryTimeout = getBlockRecoveryTimeout(heartbeatIntervalSecs); +pendingRecoveryBlocks = new
[3/3] hadoop git commit: HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat interval. Contributed by Lukas Majercak
HDFS-11576. Block recovery will fail indefinitely if recovery time > heartbeat interval. Contributed by Lukas Majercak (cherry picked from commit 5304698dc8c5667c33e6ed9c4a827ef57172a723) (cherry picked from commit 482fd5a880994f37fc3ad9e0cc2d127737b70aef) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/be664bd6 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/be664bd6 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/be664bd6 Branch: refs/heads/branch-3.0.0 Commit: be664bd64d0dba3356f44605f870e988abfec932 Parents: 7dd435c Author: Chris DouglasAuthored: Fri Dec 1 10:29:30 2017 -0800 Committer: Chris Douglas Committed: Fri Dec 1 10:35:31 2017 -0800 -- .../apache/hadoop/test/GenericTestUtils.java| 10 +- .../server/blockmanagement/BlockManager.java| 40 ++ .../blockmanagement/PendingRecoveryBlocks.java | 143 +++ .../hdfs/server/namenode/FSNamesystem.java | 40 +++--- .../TestPendingRecoveryBlocks.java | 87 +++ .../hdfs/server/datanode/TestBlockRecovery.java | 108 ++ .../namenode/ha/TestPipelinesFailover.java | 5 +- 7 files changed, 413 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/be664bd6/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java -- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 4cb9f8b..72c8d41 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -624,10 +624,16 @@ public abstract class GenericTestUtils { * conditions. */ public static class SleepAnswer implements Answer { +private final int minSleepTime; private final int maxSleepTime; private static Random r = new Random(); - + public SleepAnswer(int maxSleepTime) { + this(0, maxSleepTime); +} + +public SleepAnswer(int minSleepTime, int maxSleepTime) { + this.minSleepTime = minSleepTime; this.maxSleepTime = maxSleepTime; } @@ -635,7 +641,7 @@ public abstract class GenericTestUtils { public Object answer(InvocationOnMock invocation) throws Throwable { boolean interrupted = false; try { -Thread.sleep(r.nextInt(maxSleepTime)); +Thread.sleep(r.nextInt(maxSleepTime) + minSleepTime); } catch (InterruptedException ie) { interrupted = true; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/be664bd6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index bdabd81..f440c73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -164,6 +164,8 @@ public class BlockManager implements BlockStatsMXBean { private static final String QUEUE_REASON_FUTURE_GENSTAMP = "generation stamp is in the future"; + private static final long BLOCK_RECOVERY_TIMEOUT_MULTIPLIER = 30; + private final Namesystem namesystem; private final BlockManagerSafeMode bmSafeMode; @@ -353,6 +355,9 @@ public class BlockManager implements BlockStatsMXBean { @VisibleForTesting final PendingReconstructionBlocks pendingReconstruction; + /** Stores information about block recovery attempts. */ + private final PendingRecoveryBlocks pendingRecoveryBlocks; + /** The maximum number of replicas allowed for a block */ public final short maxReplication; /** @@ -549,6 +554,12 @@ public class BlockManager implements BlockStatsMXBean { } this.minReplicationToBeInMaintenance = (short)minMaintenanceR; +long heartbeatIntervalSecs = conf.getTimeDuration( +DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, +DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.SECONDS); +long blockRecoveryTimeout = getBlockRecoveryTimeout(heartbeatIntervalSecs); +pendingRecoveryBlocks = new PendingRecoveryBlocks(blockRecoveryTimeout); +