This is an automated email from the ASF dual-hosted git repository.
weichiu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new dd694e4d742 HBASE-28656 Optimize the verifyCopyResult logic in
ExportSnapshot (#5996)
dd694e4d742 is described below
commit dd694e4d74254965da8097ef06885ad94a3791e5
Author: Liangjun He <[email protected]>
AuthorDate: Sat Jun 22 01:02:20 2024 +0800
HBASE-28656 Optimize the verifyCopyResult logic in ExportSnapshot (#5996)
Signed-off-by: Duo Zhang <[email protected]>
Signed-off-by: Wei-Chiu Chuang <[email protected]>
---
.../hadoop/hbase/snapshot/ExportSnapshot.java | 76 +++++++++++++++++++---
.../hadoop/hbase/snapshot/TestExportSnapshot.java | 26 +++++++-
.../hbase/snapshot/TestExportSnapshotAdjunct.java | 4 +-
.../snapshot/TestExportSnapshotV1NoCluster.java | 2 +-
4 files changed, 92 insertions(+), 16 deletions(-)
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
index fd69960b78d..4e0c54b718b 100644
---
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -168,6 +168,15 @@ public class ExportSnapshot extends AbstractHBaseTool
implements Tool {
BYTES_COPIED
}
+ /**
+ * Indicates the checksum comparison result.
+ */
+ public enum ChecksumComparison {
+ TRUE, // checksum comparison is compatible and true.
+ FALSE, // checksum comparison is compatible and false.
+ INCOMPATIBLE, // checksum comparison is not compatible.
+ }
+
private static class ExportMapper
extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
private static final Logger LOG =
LoggerFactory.getLogger(ExportMapper.class);
@@ -533,6 +542,9 @@ public class ExportSnapshot extends AbstractHBaseTool
implements Tool {
}
}
+ /**
+ * Utility to compare the file length and checksums for the paths
specified.
+ */
private void verifyCopyResult(final FileStatus inputStat, final FileStatus
outputStat)
throws IOException {
long inputLen = inputStat.getLen();
@@ -547,20 +559,64 @@ public class ExportSnapshot extends AbstractHBaseTool
implements Tool {
// If length==0, we will skip checksum
if (inputLen != 0 && verifyChecksum) {
- FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
- if (inChecksum == null) {
- LOG.warn("Input file " + inputPath + " checksums are not available");
- }
- FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
- if (outChecksum == null) {
- LOG.warn("Output file " + outputPath + " checksums are not
available");
- }
- if (inChecksum != null && outChecksum != null &&
!inChecksum.equals(outChecksum)) {
- throw new IOException("Checksum mismatch between " + inputPath + "
and " + outputPath);
+ FileChecksum inChecksum = getFileChecksum(inputFs,
inputStat.getPath());
+ FileChecksum outChecksum = getFileChecksum(outputFs,
outputStat.getPath());
+
+ ChecksumComparison checksumComparison = verifyChecksum(inChecksum,
outChecksum);
+ if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
+ StringBuilder errMessage = new StringBuilder("Checksum mismatch
between ")
+ .append(inputPath).append(" and ").append(outputPath).append(".");
+
+ boolean addSkipHint = false;
+ String inputScheme = inputFs.getScheme();
+ String outputScheme = outputFs.getScheme();
+ if (!inputScheme.equals(outputScheme)) {
+ errMessage.append(" Input and output filesystems are of different
types.\n")
+ .append("Their checksum algorithms may be incompatible.");
+ addSkipHint = true;
+ } else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
+ errMessage.append(" Input and output differ in block-size.");
+ addSkipHint = true;
+ } else if (
+ inChecksum != null && outChecksum != null
+ &&
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+ ) {
+ errMessage.append(" Input and output checksum algorithms are of
different types.");
+ addSkipHint = true;
+ }
+ if (addSkipHint) {
+ errMessage
+ .append(" You can choose file-level checksum validation via "
+ + "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+ + " or filesystems are different.")
+ .append(" Or you can skip checksum-checks altogether with
--no-checksum-verify.\n")
+ .append(" (NOTE: By skipping checksums, one runs the risk of "
+ + "masking data-corruption during file-transfer.)\n");
+ }
+ throw new IOException(errMessage.toString());
}
}
}
+ /**
+ * Utility to compare checksums
+ */
+ private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
+ final FileChecksum outChecksum) {
+ // If the input or output checksum is null, or the algorithms of input
and output are not
+ // equal, that means there is no comparison
+ // and return not compatible. else if matched, return compatible with
the matched result.
+ if (
+ inChecksum == null || outChecksum == null
+ ||
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+ ) {
+ return ChecksumComparison.INCOMPATIBLE;
+ } else if (inChecksum.equals(outChecksum)) {
+ return ChecksumComparison.TRUE;
+ }
+ return ChecksumComparison.FALSE;
+ }
+
/**
* Check if the two files are equal by looking at the file length, and at
the checksum (if user
* has specified the verifyChecksum flag).
diff --git
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
index 4dcadc755da..813da956799 100644
---
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -227,6 +227,23 @@ public class TestExportSnapshot {
removeExportDir(copyDir);
}
+ @Test
+ public void testExportWithChecksum() throws Exception {
+ // Test different schemes: input scheme is hdfs:// and output scheme is
file://
+ // The checksum verification will fail
+ Path copyLocalDir = getLocalDestinationDir(TEST_UTIL);
+ testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName,
snapshotName, snapshotName,
+ tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyLocalDir, false,
false,
+ getBypassRegionPredicate(), false, true);
+
+ // Test same schemes: input scheme is hdfs:// and output scheme is hdfs://
+ // The checksum verification will success
+ Path copyHdfsDir = getHdfsDestinationDir();
+ testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName,
snapshotName, snapshotName,
+ tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyHdfsDir, false,
false,
+ getBypassRegionPredicate(), true, true);
+ }
+
@Test
public void testExportWithTargetName() throws Exception {
final String targetName = "testExportWithTargetName";
@@ -281,7 +298,7 @@ public class TestExportSnapshot {
throws Exception {
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName,
snapshotName, targetName,
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir, overwrite,
resetTtl,
- getBypassRegionPredicate(), true);
+ getBypassRegionPredicate(), true, false);
}
/**
@@ -290,8 +307,8 @@ public class TestExportSnapshot {
protected static void testExportFileSystemState(final Configuration conf,
final TableName tableName, final String snapshotName, final String
targetName,
final int filesExpected, final Path srcDir, Path rawTgtDir, final boolean
overwrite,
- final boolean resetTtl, final RegionPredicate bypassregionPredicate,
boolean success)
- throws Exception {
+ final boolean resetTtl, final RegionPredicate bypassregionPredicate, final
boolean success,
+ final boolean checksumVerify) throws Exception {
FileSystem tgtFs = rawTgtDir.getFileSystem(conf);
FileSystem srcFs = srcDir.getFileSystem(conf);
Path tgtDir = rawTgtDir.makeQualified(tgtFs.getUri(),
tgtFs.getWorkingDirectory());
@@ -312,6 +329,9 @@ public class TestExportSnapshot {
if (resetTtl) {
opts.add("--reset-ttl");
}
+ if (!checksumVerify) {
+ opts.add("--no-checksum-verify");
+ }
// Export Snapshot
int res = run(conf, new ExportSnapshot(), opts.toArray(new
String[opts.size()]));
diff --git
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
index a2db1c68820..9453b9fcaf4 100644
---
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
+++
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
@@ -151,7 +151,7 @@ public class TestExportSnapshotAdjunct {
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
conf.setInt("mapreduce.map.maxattempts", 3);
TestExportSnapshot.testExportFileSystemState(conf, tableName,
snapshotName, snapshotName,
- tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false,
null, true);
+ tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false,
null, true, false);
}
/**
@@ -167,6 +167,6 @@ public class TestExportSnapshotAdjunct {
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
conf.setInt("mapreduce.map.maxattempts", 3);
TestExportSnapshot.testExportFileSystemState(conf, tableName,
snapshotName, snapshotName,
- tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false,
null, false);
+ tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false,
null, false, false);
}
}
diff --git
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
index 19496fcfe41..0215711070f 100644
---
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
+++
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
@@ -125,7 +125,7 @@ public class TestExportSnapshotV1NoCluster {
TableName tableName = builder.getTableDescriptor().getTableName();
TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(),
tableName,
snapshotName, snapshotName, snapshotFilesCount, testDir,
- getDestinationDir(fs, testUtil, testDir), false, false, null, true);
+ getDestinationDir(fs, testUtil, testDir), false, false, null, true,
false);
}
static Path getDestinationDir(FileSystem fs, HBaseCommonTestingUtil hctu,
Path testDir)