Repository: hadoop Updated Branches: refs/heads/branch-2.7 d4a462c02 -> 9e11ae684
HDFS-8151. Always use snapshot path as source when invalid snapshot names are used for diff based distcp. Contributed by Jing Zhao. (cherry picked from commit 4c097e473bb1f18d1510deb61bae2bcb8c156f18) (cherry picked from commit d4dd97eabd6691eedeeb9fb7685060dfb192ff21) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9e11ae68 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9e11ae68 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9e11ae68 Branch: refs/heads/branch-2.7 Commit: 9e11ae684a520ade428cf8eb31db0ad821be73f3 Parents: d4a462c Author: Jing Zhao <ji...@apache.org> Authored: Wed Apr 15 12:37:20 2015 -0700 Committer: Jing Zhao <ji...@apache.org> Committed: Wed Apr 15 12:41:34 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/tools/DistCpSync.java | 12 +++++++----- .../java/org/apache/hadoop/tools/TestDistCpSync.java | 15 ++++++++++++++- 3 files changed, 24 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e11ae68/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7c9225a..fda744b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -20,6 +20,9 @@ Release 2.7.1 - UNRELEASED HDFS-8127. NameNode Failover during HA upgrade can cause DataNode to finalize upgrade. (jing9) + HDFS-8151. Always use snapshot path as source when invalid snapshot names + are used for diff based distcp. (jing9) + Release 2.7.0 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e11ae68/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 8e71b6f..5bf638d 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -47,8 +47,8 @@ class DistCpSync { List<Path> sourcePaths = inputOptions.getSourcePaths(); if (sourcePaths.size() != 1) { // we only support one source dir which must be a snapshottable directory - DistCp.LOG.warn(sourcePaths.size() + " source paths are provided"); - return false; + throw new IllegalArgumentException(sourcePaths.size() + + " source paths are provided"); } final Path sourceDir = sourcePaths.get(0); final Path targetDir = inputOptions.getTargetPath(); @@ -59,15 +59,17 @@ class DistCpSync { // DistributedFileSystem. if (!(sfs instanceof DistributedFileSystem) || !(tfs instanceof DistributedFileSystem)) { - DistCp.LOG.warn("To use diff-based distcp, the FileSystems needs to" + - " be DistributedFileSystem"); - return false; + throw new IllegalArgumentException("The FileSystems needs to" + + " be DistributedFileSystem for using snapshot-diff-based distcp"); } final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs; final DistributedFileSystem targetFs= (DistributedFileSystem) tfs; // make sure targetFS has no change between from and the current states if (!checkNoChange(inputOptions, targetFs, targetDir)) { + // set the source path using the snapshot path + inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir, + inputOptions.getToSnapshot()))); return false; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e11ae68/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 75d1de5..0a9a11f 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -88,24 +88,37 @@ public class TestDistCpSync { public void testFallback() throws Exception { // the source/target dir are not snapshottable dir Assert.assertFalse(DistCpSync.sync(options, conf)); + // make sure the source path has been updated to the snapshot path + final Path spath = new Path(source, + HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); + Assert.assertEquals(spath, options.getSourcePaths().get(0)); + // reset source path in options + options.setSourcePaths(Arrays.asList(source)); // the source/target does not have the given snapshots dfs.allowSnapshot(source); dfs.allowSnapshot(target); Assert.assertFalse(DistCpSync.sync(options, conf)); + Assert.assertEquals(spath, options.getSourcePaths().get(0)); + // reset source path in options + options.setSourcePaths(Arrays.asList(source)); dfs.createSnapshot(source, "s1"); dfs.createSnapshot(source, "s2"); dfs.createSnapshot(target, "s1"); Assert.assertTrue(DistCpSync.sync(options, conf)); + // reset source paths in options options.setSourcePaths(Arrays.asList(source)); - // changes have been made in target final Path subTarget = new Path(target, "sub"); dfs.mkdirs(subTarget); Assert.assertFalse(DistCpSync.sync(options, conf)); + // make sure the source path has been updated to the snapshot path + Assert.assertEquals(spath, options.getSourcePaths().get(0)); + // reset source paths in options + options.setSourcePaths(Arrays.asList(source)); dfs.delete(subTarget, true); Assert.assertTrue(DistCpSync.sync(options, conf)); }