This is an automated email from the ASF dual-hosted git repository.

weichiu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new dd694e4d742 HBASE-28656 Optimize the verifyCopyResult logic in 
ExportSnapshot (#5996)
dd694e4d742 is described below

commit dd694e4d74254965da8097ef06885ad94a3791e5
Author: Liangjun He <[email protected]>
AuthorDate: Sat Jun 22 01:02:20 2024 +0800

    HBASE-28656 Optimize the verifyCopyResult logic in ExportSnapshot (#5996)
    
    Signed-off-by: Duo Zhang <[email protected]>
    Signed-off-by: Wei-Chiu Chuang <[email protected]>
---
 .../hadoop/hbase/snapshot/ExportSnapshot.java      | 76 +++++++++++++++++++---
 .../hadoop/hbase/snapshot/TestExportSnapshot.java  | 26 +++++++-
 .../hbase/snapshot/TestExportSnapshotAdjunct.java  |  4 +-
 .../snapshot/TestExportSnapshotV1NoCluster.java    |  2 +-
 4 files changed, 92 insertions(+), 16 deletions(-)

diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
index fd69960b78d..4e0c54b718b 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -168,6 +168,15 @@ public class ExportSnapshot extends AbstractHBaseTool 
implements Tool {
     BYTES_COPIED
   }
 
+  /**
+   * Indicates the checksum comparison result.
+   */
+  public enum ChecksumComparison {
+    TRUE, // checksum comparison is compatible and true.
+    FALSE, // checksum comparison is compatible and false.
+    INCOMPATIBLE, // checksum comparison is not compatible.
+  }
+
   private static class ExportMapper
     extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
     private static final Logger LOG = 
LoggerFactory.getLogger(ExportMapper.class);
@@ -533,6 +542,9 @@ public class ExportSnapshot extends AbstractHBaseTool 
implements Tool {
       }
     }
 
+    /**
+     * Utility to compare the file length and checksums for the paths 
specified.
+     */
     private void verifyCopyResult(final FileStatus inputStat, final FileStatus 
outputStat)
       throws IOException {
       long inputLen = inputStat.getLen();
@@ -547,20 +559,64 @@ public class ExportSnapshot extends AbstractHBaseTool 
implements Tool {
 
       // If length==0, we will skip checksum
       if (inputLen != 0 && verifyChecksum) {
-        FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
-        if (inChecksum == null) {
-          LOG.warn("Input file " + inputPath + " checksums are not available");
-        }
-        FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
-        if (outChecksum == null) {
-          LOG.warn("Output file " + outputPath + " checksums are not 
available");
-        }
-        if (inChecksum != null && outChecksum != null && 
!inChecksum.equals(outChecksum)) {
-          throw new IOException("Checksum mismatch between " + inputPath + " 
and " + outputPath);
+        FileChecksum inChecksum = getFileChecksum(inputFs, 
inputStat.getPath());
+        FileChecksum outChecksum = getFileChecksum(outputFs, 
outputStat.getPath());
+
+        ChecksumComparison checksumComparison = verifyChecksum(inChecksum, 
outChecksum);
+        if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
+          StringBuilder errMessage = new StringBuilder("Checksum mismatch 
between ")
+            .append(inputPath).append(" and ").append(outputPath).append(".");
+
+          boolean addSkipHint = false;
+          String inputScheme = inputFs.getScheme();
+          String outputScheme = outputFs.getScheme();
+          if (!inputScheme.equals(outputScheme)) {
+            errMessage.append(" Input and output filesystems are of different 
types.\n")
+              .append("Their checksum algorithms may be incompatible.");
+            addSkipHint = true;
+          } else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
+            errMessage.append(" Input and output differ in block-size.");
+            addSkipHint = true;
+          } else if (
+            inChecksum != null && outChecksum != null
+              && 
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+          ) {
+            errMessage.append(" Input and output checksum algorithms are of 
different types.");
+            addSkipHint = true;
+          }
+          if (addSkipHint) {
+            errMessage
+              .append(" You can choose file-level checksum validation via "
+                + "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+                + " or filesystems are different.")
+              .append(" Or you can skip checksum-checks altogether with 
--no-checksum-verify.\n")
+              .append(" (NOTE: By skipping checksums, one runs the risk of "
+                + "masking data-corruption during file-transfer.)\n");
+          }
+          throw new IOException(errMessage.toString());
         }
       }
     }
 
+    /**
+     * Utility to compare checksums
+     */
+    private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
+      final FileChecksum outChecksum) {
+      // If the input or output checksum is null, or the algorithms of input 
and output are not
+      // equal, that means there is no comparison
+      // and return not compatible. else if matched, return compatible with 
the matched result.
+      if (
+        inChecksum == null || outChecksum == null
+          || 
!inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
+      ) {
+        return ChecksumComparison.INCOMPATIBLE;
+      } else if (inChecksum.equals(outChecksum)) {
+        return ChecksumComparison.TRUE;
+      }
+      return ChecksumComparison.FALSE;
+    }
+
     /**
      * Check if the two files are equal by looking at the file length, and at 
the checksum (if user
      * has specified the verifyChecksum flag).
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
index 4dcadc755da..813da956799 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -227,6 +227,23 @@ public class TestExportSnapshot {
     removeExportDir(copyDir);
   }
 
+  @Test
+  public void testExportWithChecksum() throws Exception {
+    // Test different schemes: input scheme is hdfs:// and output scheme is 
file://
+    // The checksum verification will fail
+    Path copyLocalDir = getLocalDestinationDir(TEST_UTIL);
+    testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, 
snapshotName, snapshotName,
+      tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyLocalDir, false, 
false,
+      getBypassRegionPredicate(), false, true);
+
+    // Test same schemes: input scheme is hdfs:// and output scheme is hdfs://
+    // The checksum verification will success
+    Path copyHdfsDir = getHdfsDestinationDir();
+    testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, 
snapshotName, snapshotName,
+      tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyHdfsDir, false, 
false,
+      getBypassRegionPredicate(), true, true);
+  }
+
   @Test
   public void testExportWithTargetName() throws Exception {
     final String targetName = "testExportWithTargetName";
@@ -281,7 +298,7 @@ public class TestExportSnapshot {
     throws Exception {
     testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, 
snapshotName, targetName,
       filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir, overwrite, 
resetTtl,
-      getBypassRegionPredicate(), true);
+      getBypassRegionPredicate(), true, false);
   }
 
   /**
@@ -290,8 +307,8 @@ public class TestExportSnapshot {
   protected static void testExportFileSystemState(final Configuration conf,
     final TableName tableName, final String snapshotName, final String 
targetName,
     final int filesExpected, final Path srcDir, Path rawTgtDir, final boolean 
overwrite,
-    final boolean resetTtl, final RegionPredicate bypassregionPredicate, 
boolean success)
-    throws Exception {
+    final boolean resetTtl, final RegionPredicate bypassregionPredicate, final 
boolean success,
+    final boolean checksumVerify) throws Exception {
     FileSystem tgtFs = rawTgtDir.getFileSystem(conf);
     FileSystem srcFs = srcDir.getFileSystem(conf);
     Path tgtDir = rawTgtDir.makeQualified(tgtFs.getUri(), 
tgtFs.getWorkingDirectory());
@@ -312,6 +329,9 @@ public class TestExportSnapshot {
     if (resetTtl) {
       opts.add("--reset-ttl");
     }
+    if (!checksumVerify) {
+      opts.add("--no-checksum-verify");
+    }
 
     // Export Snapshot
     int res = run(conf, new ExportSnapshot(), opts.toArray(new 
String[opts.size()]));
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
index a2db1c68820..9453b9fcaf4 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java
@@ -151,7 +151,7 @@ public class TestExportSnapshotAdjunct {
     conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
     conf.setInt("mapreduce.map.maxattempts", 3);
     TestExportSnapshot.testExportFileSystemState(conf, tableName, 
snapshotName, snapshotName,
-      tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, 
null, true);
+      tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, 
null, true, false);
   }
 
   /**
@@ -167,6 +167,6 @@ public class TestExportSnapshotAdjunct {
     conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
     conf.setInt("mapreduce.map.maxattempts", 3);
     TestExportSnapshot.testExportFileSystemState(conf, tableName, 
snapshotName, snapshotName,
-      tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, 
null, false);
+      tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, 
null, false, false);
   }
 }
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
index 19496fcfe41..0215711070f 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java
@@ -125,7 +125,7 @@ public class TestExportSnapshotV1NoCluster {
     TableName tableName = builder.getTableDescriptor().getTableName();
     TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), 
tableName,
       snapshotName, snapshotName, snapshotFilesCount, testDir,
-      getDestinationDir(fs, testUtil, testDir), false, false, null, true);
+      getDestinationDir(fs, testUtil, testDir), false, false, null, true, 
false);
   }
 
   static Path getDestinationDir(FileSystem fs, HBaseCommonTestingUtil hctu, 
Path testDir)

Reply via email to