[1/2] hadoop git commit: HADOOP-15273.distcp can't handle remote stores with different checksum algorithms. Contributed by Steve Loughran.

2018-03-09 Thread stevel
Repository: hadoop
Updated Branches:
  refs/heads/branch-3.0 f879504fe -> ca5c4d454


HADOOP-15273.distcp can't handle remote stores with different checksum 
algorithms.
Contributed by Steve Loughran.

(cherry picked from commit 7ef4d942dd96232b0743a40ed25f77065254f94d)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1771af23
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1771af23
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1771af23

Branch: refs/heads/branch-3.0
Commit: 1771af2320b9627c37aae7593e14a850d4935115
Parents: f879504
Author: Steve Loughran 
Authored: Thu Mar 8 11:24:06 2018 +
Committer: Steve Loughran 
Committed: Fri Mar 9 10:49:11 2018 +

--
 .../org/apache/hadoop/tools/DistCpOptions.java  |  5 
 .../tools/mapred/RetriableFileCopyCommand.java  | 29 +++-
 .../hadoop/tools/mapred/TestCopyMapper.java | 14 +-
 3 files changed, 29 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1771af23/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
--
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
index ece1a94..f33f7fd 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -534,11 +534,6 @@ public final class DistCpOptions {
 + "mutually exclusive");
   }
 
-  if (!syncFolder && skipCRC) {
-throw new IllegalArgumentException(
-"Skip CRC is valid only with update options");
-  }
-
   if (!syncFolder && append) {
 throw new IllegalArgumentException(
 "Append is valid only with update options");

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1771af23/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
--
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
index 21f621a..1eabf7f 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends 
RetriableCommand {
   throws IOException {
 if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
 targetFS, target)) {
-  StringBuilder errorMessage = new StringBuilder("Check-sum mismatch 
between ")
-  .append(source).append(" and ").append(target).append(".");
-  if (sourceFS.getFileStatus(source).getBlockSize() !=
+  StringBuilder errorMessage =
+  new StringBuilder("Checksum mismatch between ")
+  .append(source).append(" and ").append(target).append(".");
+  boolean addSkipHint = false;
+  String srcScheme = sourceFS.getScheme();
+  String targetScheme = targetFS.getScheme();
+  if (!srcScheme.equals(targetScheme)
+  && !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) {
+// the filesystems are different and they aren't both hdfs connectors
+errorMessage.append("Source and destination filesystems are of"
++ " different types\n")
+.append("Their checksum algorithms may be incompatible");
+addSkipHint = true;
+  } else if (sourceFS.getFileStatus(source).getBlockSize() !=
   targetFS.getFileStatus(target).getBlockSize()) {
-errorMessage.append(" Source and target differ in block-size.")
-.append(" Use -pb to preserve block-sizes during copy.")
-.append(" Alternatively, skip checksum-checks altogether, using 
-skipCrc.")
+errorMessage.append(" Source and target differ in block-size.\n")
+.append(" Use -pb to preserve block-sizes during copy.");
+addSkipHint = true;
+  }
+  if (addSkipHint) {
+errorMessage.append(" You can skip checksum-checks altogether "
++ " with -skipcrccheck.\n")
 .append(" (NOTE: By skipping checksums, one runs the risk of " +
-"masking data-corruption during file-transfer.)");
+"masking data-corruption during file-transfer.)\n");
   }
   throw new 

[1/2] hadoop git commit: HADOOP-15273.distcp can't handle remote stores with different checksum algorithms. Contributed by Steve Loughran.

2018-03-08 Thread stevel
Repository: hadoop
Updated Branches:
  refs/heads/branch-3.1 f0b486f6a -> ba0184376
  refs/heads/trunk 3bd6b1fd8 -> 7ef4d942d


HADOOP-15273.distcp can't handle remote stores with different checksum 
algorithms.
Contributed by Steve Loughran.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7ef4d942
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7ef4d942
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7ef4d942

Branch: refs/heads/trunk
Commit: 7ef4d942dd96232b0743a40ed25f77065254f94d
Parents: 3bd6b1f
Author: Steve Loughran 
Authored: Thu Mar 8 11:24:06 2018 +
Committer: Steve Loughran 
Committed: Thu Mar 8 11:24:06 2018 +

--
 .../org/apache/hadoop/tools/DistCpOptions.java  |  5 
 .../tools/mapred/RetriableFileCopyCommand.java  | 29 +++-
 .../hadoop/tools/mapred/TestCopyMapper.java | 14 +-
 3 files changed, 29 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ef4d942/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
--
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
index ece1a94..f33f7fd 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -534,11 +534,6 @@ public final class DistCpOptions {
 + "mutually exclusive");
   }
 
-  if (!syncFolder && skipCRC) {
-throw new IllegalArgumentException(
-"Skip CRC is valid only with update options");
-  }
-
   if (!syncFolder && append) {
 throw new IllegalArgumentException(
 "Append is valid only with update options");

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ef4d942/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
--
diff --git 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
index 0311061..55f90d0 100644
--- 
a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ 
b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends 
RetriableCommand {
   throws IOException {
 if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
 targetFS, target)) {
-  StringBuilder errorMessage = new StringBuilder("Check-sum mismatch 
between ")
-  .append(source).append(" and ").append(target).append(".");
-  if (sourceFS.getFileStatus(source).getBlockSize() !=
+  StringBuilder errorMessage =
+  new StringBuilder("Checksum mismatch between ")
+  .append(source).append(" and ").append(target).append(".");
+  boolean addSkipHint = false;
+  String srcScheme = sourceFS.getScheme();
+  String targetScheme = targetFS.getScheme();
+  if (!srcScheme.equals(targetScheme)
+  && !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) {
+// the filesystems are different and they aren't both hdfs connectors
+errorMessage.append("Source and destination filesystems are of"
++ " different types\n")
+.append("Their checksum algorithms may be incompatible");
+addSkipHint = true;
+  } else if (sourceFS.getFileStatus(source).getBlockSize() !=
   targetFS.getFileStatus(target).getBlockSize()) {
-errorMessage.append(" Source and target differ in block-size.")
-.append(" Use -pb to preserve block-sizes during copy.")
-.append(" Alternatively, skip checksum-checks altogether, using 
-skipCrc.")
+errorMessage.append(" Source and target differ in block-size.\n")
+.append(" Use -pb to preserve block-sizes during copy.");
+addSkipHint = true;
+  }
+  if (addSkipHint) {
+errorMessage.append(" You can skip checksum-checks altogether "
++ " with -skipcrccheck.\n")
 .append(" (NOTE: By skipping checksums, one runs the risk of " +
-"masking data-corruption during file-transfer.)");
+"masking data-corruption during file-transfer.)\n");
   }
   throw new IOException(errorMessage.toString());