2005hithlj commented on code in PR #5996:
URL: https://github.com/apache/hbase/pull/5996#discussion_r1671494903


##########
hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java:
##########
@@ -547,20 +559,64 @@ private void verifyCopyResult(final FileStatus inputStat, 
final FileStatus outpu
 
       // If length==0, we will skip checksum
       if (inputLen != 0 && verifyChecksum) {
-        FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
-        if (inChecksum == null) {
-          LOG.warn("Input file " + inputPath + " checksums are not available");
-        }
-        FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
-        if (outChecksum == null) {
-          LOG.warn("Output file " + outputPath + " checksums are not 
available");
-        }
-        if (inChecksum != null && outChecksum != null && 
!inChecksum.equals(outChecksum)) {
-          throw new IOException("Checksum mismatch between " + inputPath + " 
and " + outputPath);
+        FileChecksum inChecksum = getFileChecksum(inputFs, 
inputStat.getPath());
+        FileChecksum outChecksum = getFileChecksum(outputFs, 
outputStat.getPath());
+
+        ChecksumComparison checksumComparison = verifyChecksum(inChecksum, 
outChecksum);
+        if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
+          StringBuilder errMessage = new StringBuilder("Checksum mismatch 
between ")
+            .append(inputPath).append(" and ").append(outputPath).append(".");
+
+          boolean addSkipHint = false;
+          String inputScheme = inputFs.getScheme();
+          String outputScheme = outputFs.getScheme();
+          if (!inputScheme.equals(outputScheme)) {
+            errMessage.append(" Input and output filesystems are of different 
types.\n")
+              .append("Their checksum algorithms may be incompatible.");
+            addSkipHint = true;
+          } else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
+            errMessage.append(" Input and output differ in block-size.");
+            addSkipHint = true;
+          } else if (
+            inChecksum != null && outChecksum != null
+              && 
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+          ) {
+            errMessage.append(" Input and output checksum algorithms are of 
different types.");
+            addSkipHint = true;
+          }
+          if (addSkipHint) {
+            errMessage
+              .append(" You can choose file-level checksum validation via "
+                + "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+                + " or filesystems are different.")
+              .append(" Or you can skip checksum-checks altogether with 
--no-checksum-verify.\n")
+              .append(" (NOTE: By skipping checksums, one runs the risk of "
+                + "masking data-corruption during file-transfer.)\n");
+          }
+          throw new IOException(errMessage.toString());
         }
       }
     }
 
+    /**
+     * Utility to compare checksums
+     */
+    private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
+      final FileChecksum outChecksum) {
+      // If the input or output checksum is null, or the algorithms of input 
and output are not
+      // equal, that means there is no comparison
+      // and return not compatible. else if matched, return compatible with 
the matched result.
+      if (
+        inChecksum == null || outChecksum == null
+          || 
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+      ) {

Review Comment:
   @guluo2016 Thank you for the review, for cases where 
LocalFileSystem.getFileChecksum returns null, you can specify 
--no-checksum-verify to ignore checksum verification.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to