[1/2] hadoop git commit: HADOOP-15273.distcp can't handle remote stores with different checksum algorithms. Contributed by Steve Loughran.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 f879504fe -> ca5c4d454 HADOOP-15273.distcp can't handle remote stores with different checksum algorithms. Contributed by Steve Loughran. (cherry picked from commit 7ef4d942dd96232b0743a40ed25f77065254f94d) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1771af23 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1771af23 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1771af23 Branch: refs/heads/branch-3.0 Commit: 1771af2320b9627c37aae7593e14a850d4935115 Parents: f879504 Author: Steve LoughranAuthored: Thu Mar 8 11:24:06 2018 + Committer: Steve Loughran Committed: Fri Mar 9 10:49:11 2018 + -- .../org/apache/hadoop/tools/DistCpOptions.java | 5 .../tools/mapred/RetriableFileCopyCommand.java | 29 +++- .../hadoop/tools/mapred/TestCopyMapper.java | 14 +- 3 files changed, 29 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1771af23/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java -- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index ece1a94..f33f7fd 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -534,11 +534,6 @@ public final class DistCpOptions { + "mutually exclusive"); } - if (!syncFolder && skipCRC) { -throw new IllegalArgumentException( -"Skip CRC is valid only with update options"); - } - if (!syncFolder && append) { throw new IllegalArgumentException( "Append is valid only with update options"); http://git-wip-us.apache.org/repos/asf/hadoop/blob/1771af23/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java -- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index 21f621a..1eabf7f 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends RetriableCommand { throws IOException { if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum, targetFS, target)) { - StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ") - .append(source).append(" and ").append(target).append("."); - if (sourceFS.getFileStatus(source).getBlockSize() != + StringBuilder errorMessage = + new StringBuilder("Checksum mismatch between ") + .append(source).append(" and ").append(target).append("."); + boolean addSkipHint = false; + String srcScheme = sourceFS.getScheme(); + String targetScheme = targetFS.getScheme(); + if (!srcScheme.equals(targetScheme) + && !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) { +// the filesystems are different and they aren't both hdfs connectors +errorMessage.append("Source and destination filesystems are of" ++ " different types\n") +.append("Their checksum algorithms may be incompatible"); +addSkipHint = true; + } else if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) { -errorMessage.append(" Source and target differ in block-size.") -.append(" Use -pb to preserve block-sizes during copy.") -.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.") +errorMessage.append(" Source and target differ in block-size.\n") +.append(" Use -pb to preserve block-sizes during copy."); +addSkipHint = true; + } + if (addSkipHint) { +errorMessage.append(" You can skip checksum-checks altogether " ++ " with -skipcrccheck.\n") .append(" (NOTE: By skipping checksums, one runs the risk of " + -"masking data-corruption during file-transfer.)"); +"masking data-corruption during file-transfer.)\n"); } throw new
[1/2] hadoop git commit: HADOOP-15273.distcp can't handle remote stores with different checksum algorithms. Contributed by Steve Loughran.
Repository: hadoop Updated Branches: refs/heads/branch-3.1 f0b486f6a -> ba0184376 refs/heads/trunk 3bd6b1fd8 -> 7ef4d942d HADOOP-15273.distcp can't handle remote stores with different checksum algorithms. Contributed by Steve Loughran. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7ef4d942 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7ef4d942 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7ef4d942 Branch: refs/heads/trunk Commit: 7ef4d942dd96232b0743a40ed25f77065254f94d Parents: 3bd6b1f Author: Steve LoughranAuthored: Thu Mar 8 11:24:06 2018 + Committer: Steve Loughran Committed: Thu Mar 8 11:24:06 2018 + -- .../org/apache/hadoop/tools/DistCpOptions.java | 5 .../tools/mapred/RetriableFileCopyCommand.java | 29 +++- .../hadoop/tools/mapred/TestCopyMapper.java | 14 +- 3 files changed, 29 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ef4d942/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java -- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index ece1a94..f33f7fd 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -534,11 +534,6 @@ public final class DistCpOptions { + "mutually exclusive"); } - if (!syncFolder && skipCRC) { -throw new IllegalArgumentException( -"Skip CRC is valid only with update options"); - } - if (!syncFolder && append) { throw new IllegalArgumentException( "Append is valid only with update options"); http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ef4d942/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java -- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index 0311061..55f90d0 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends RetriableCommand { throws IOException { if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum, targetFS, target)) { - StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ") - .append(source).append(" and ").append(target).append("."); - if (sourceFS.getFileStatus(source).getBlockSize() != + StringBuilder errorMessage = + new StringBuilder("Checksum mismatch between ") + .append(source).append(" and ").append(target).append("."); + boolean addSkipHint = false; + String srcScheme = sourceFS.getScheme(); + String targetScheme = targetFS.getScheme(); + if (!srcScheme.equals(targetScheme) + && !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) { +// the filesystems are different and they aren't both hdfs connectors +errorMessage.append("Source and destination filesystems are of" ++ " different types\n") +.append("Their checksum algorithms may be incompatible"); +addSkipHint = true; + } else if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) { -errorMessage.append(" Source and target differ in block-size.") -.append(" Use -pb to preserve block-sizes during copy.") -.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.") +errorMessage.append(" Source and target differ in block-size.\n") +.append(" Use -pb to preserve block-sizes during copy."); +addSkipHint = true; + } + if (addSkipHint) { +errorMessage.append(" You can skip checksum-checks altogether " ++ " with -skipcrccheck.\n") .append(" (NOTE: By skipping checksums, one runs the risk of " + -"masking data-corruption during file-transfer.)"); +"masking data-corruption during file-transfer.)\n"); } throw new IOException(errorMessage.toString());