HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move progress. Contributed by Surendra Singh Lilhore
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9ef03a4c Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9ef03a4c Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9ef03a4c Branch: refs/heads/YARN-1197 Commit: 9ef03a4c5bb5573eadc7d04e371c4af2dc6bae37 Parents: f7c8311 Author: Tsz-Wo Nicholas Sze <szets...@hortonworks.com> Authored: Mon Jul 13 15:12:26 2015 -0700 Committer: Tsz-Wo Nicholas Sze <szets...@hortonworks.com> Committed: Mon Jul 13 15:12:26 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/balancer/Dispatcher.java | 18 +++++++++++++ .../apache/hadoop/hdfs/server/mover/Mover.java | 27 +++++++++++++++----- .../hadoop/hdfs/server/mover/TestMover.java | 2 +- 4 files changed, 43 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1491990..e843dcc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -716,6 +716,9 @@ Release 2.8.0 - UNRELEASED HDFS-8751. Remove setBlocks API from INodeFile and misc code cleanup. (Zhe Zhang via jing9) + HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move + progress. (Surendra Singh Lilhore via szetszwo) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index 4a8f40f..298b86d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -317,6 +317,7 @@ public class Dispatcher { sendRequest(out, eb, accessToken); receiveResponse(in); nnc.getBytesMoved().addAndGet(block.getNumBytes()); + target.getDDatanode().setHasSuccess(); LOG.info("Successfully moved " + this); } catch (IOException e) { LOG.warn("Failed to move " + this + ": " + e.getMessage()); @@ -500,6 +501,7 @@ public class Dispatcher { /** blocks being moved but not confirmed yet */ private final List<PendingMove> pendings; private volatile boolean hasFailure = false; + private volatile boolean hasSuccess = false; private final int maxConcurrentMoves; @Override @@ -573,6 +575,10 @@ public class Dispatcher { void setHasFailure() { this.hasFailure = true; } + + void setHasSuccess() { + this.hasSuccess = true; + } } /** A node that can be the sources of a block move */ @@ -965,6 +971,18 @@ public class Dispatcher { } /** + * @return true if some moves are success. + */ + public static boolean checkForSuccess( + Iterable<? extends StorageGroup> targets) { + boolean hasSuccess = false; + for (StorageGroup t : targets) { + hasSuccess |= t.getDDatanode().hasSuccess; + } + return hasSuccess; + } + + /** * Decide if the block is a good candidate to be moved from source to target. * A block is a good candidate if * 1. the block is not in the process of being moved/has not been moved; http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java index 344b9fc..afacebb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -269,10 +269,14 @@ public class Mover { // wait for pending move to finish and retry the failed migration boolean hasFailed = Dispatcher.waitForMoveCompletion(storages.targets .values()); - if (hasFailed) { + boolean hasSuccess = Dispatcher.checkForSuccess(storages.targets + .values()); + if (hasFailed && !hasSuccess) { if (retryCount.get() == retryMaxAttempts) { - throw new IOException("Failed to move some block's after " + result.setRetryFailed(); + LOG.error("Failed to move some block's after " + retryMaxAttempts + " retries."); + return result; } else { retryCount.incrementAndGet(); } @@ -713,10 +717,12 @@ public class Mover { private boolean hasRemaining; private boolean noBlockMoved; + private boolean retryFailed; Result() { hasRemaining = false; noBlockMoved = true; + retryFailed = false; } boolean isHasRemaining() { @@ -735,16 +741,25 @@ public class Mover { this.noBlockMoved = noBlockMoved; } + void setRetryFailed() { + this.retryFailed = true; + } + /** - * @return SUCCESS if all moves are success and there is no remaining move. + * @return NO_MOVE_PROGRESS if no progress in move after some retry. Return + * SUCCESS if all moves are success and there is no remaining move. * Return NO_MOVE_BLOCK if there moves available but all the moves * cannot be scheduled. Otherwise, return IN_PROGRESS since there * must be some remaining moves. */ ExitStatus getExitStatus() { - return !isHasRemaining() ? ExitStatus.SUCCESS - : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK - : ExitStatus.IN_PROGRESS; + if (retryFailed) { + return ExitStatus.NO_MOVE_PROGRESS; + } else { + return !isHasRemaining() ? ExitStatus.SUCCESS + : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK + : ExitStatus.IN_PROGRESS; + } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java index 899b5c0..d3d814c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -404,7 +404,7 @@ public class TestMover { int rc = ToolRunner.run(conf, new Mover.Cli(), new String[] {"-p", file.toString()}); Assert.assertEquals("Movement should fail after some retry", - ExitStatus.IO_EXCEPTION.getExitCode(), rc); + ExitStatus.NO_MOVE_PROGRESS.getExitCode(), rc); } finally { cluster.shutdown(); }