guluo2016 commented on code in PR #5996:
URL: https://github.com/apache/hbase/pull/5996#discussion_r1664973473
##########
hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java:
##########
@@ -547,20 +559,64 @@ private void verifyCopyResult(final FileStatus inputStat,
final FileStatus outpu
// If length==0, we will skip checksum
if (inputLen != 0 && verifyChecksum) {
- FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
- if (inChecksum == null) {
- LOG.warn("Input file " + inputPath + " checksums are not available");
- }
- FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
- if (outChecksum == null) {
- LOG.warn("Output file " + outputPath + " checksums are not
available");
- }
- if (inChecksum != null && outChecksum != null &&
!inChecksum.equals(outChecksum)) {
- throw new IOException("Checksum mismatch between " + inputPath + "
and " + outputPath);
+ FileChecksum inChecksum = getFileChecksum(inputFs,
inputStat.getPath());
+ FileChecksum outChecksum = getFileChecksum(outputFs,
outputStat.getPath());
+
+ ChecksumComparison checksumComparison = verifyChecksum(inChecksum,
outChecksum);
+ if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
+ StringBuilder errMessage = new StringBuilder("Checksum mismatch
between ")
+ .append(inputPath).append(" and ").append(outputPath).append(".");
+
+ boolean addSkipHint = false;
+ String inputScheme = inputFs.getScheme();
+ String outputScheme = outputFs.getScheme();
+ if (!inputScheme.equals(outputScheme)) {
+ errMessage.append(" Input and output filesystems are of different
types.\n")
+ .append("Their checksum algorithms may be incompatible.");
+ addSkipHint = true;
+ } else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
+ errMessage.append(" Input and output differ in block-size.");
+ addSkipHint = true;
+ } else if (
+ inChecksum != null && outChecksum != null
+ &&
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+ ) {
+ errMessage.append(" Input and output checksum algorithms are of
different types.");
+ addSkipHint = true;
+ }
+ if (addSkipHint) {
+ errMessage
+ .append(" You can choose file-level checksum validation via "
+ + "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+ + " or filesystems are different.")
+ .append(" Or you can skip checksum-checks altogether with
--no-checksum-verify.\n")
+ .append(" (NOTE: By skipping checksums, one runs the risk of "
+ + "masking data-corruption during file-transfer.)\n");
+ }
+ throw new IOException(errMessage.toString());
}
}
}
+ /**
+ * Utility to compare checksums
+ */
+ private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
+ final FileChecksum outChecksum) {
+ // If the input or output checksum is null, or the algorithms of input
and output are not
+ // equal, that means there is no comparison
+ // and return not compatible. else if matched, return compatible with
the matched result.
+ if (
+ inChecksum == null || outChecksum == null
+ ||
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+ ) {
Review Comment:
@2005hithlj
I'm sorry for my late reply, as I just discovered this issue.
For LocalFileSystem, since it did not override the getFileChecksum method,
LocalFileSystem.getFileChecksum always returns null.
Therefore, if we start a standalone hbase (eg: set hbase.rootdir
file:///tmp/hbase), executing Exportsnapshot will fail because checksum failed
in here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]