d-c-manning commented on code in PR #7063: URL: https://github.com/apache/hbase/pull/7063#discussion_r2135793122
########## hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelper.java: ########## @@ -218,6 +236,104 @@ public void testCopyExpiredSnapshotForScanner() throws IOException, InterruptedE .copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName)); } + /** + * Test scenario for HBASE-29346, which addresses the issue where restoring snapshots after region + * merge operations could lead to missing store file references, potentially resulting in data + * loss. + * <p> + * This test performs the following steps: + * </p> + * <ol> + * <li>Creates a table with multiple regions.</li> + * <li>Inserts data into each region and flushes to create store files.</li> + * <li>Takes snapshot of the table and performs restore.</li> + * <li>Disable compactions, merge regions, create a new snapshot, and restore that snapshot on the + * same restore path.</li> + * <li>Verifies data integrity by creating another snapshot.</li> + * </ol> + */ + @Test + public void testMultiSnapshotRestoreWithMerge() throws IOException, InterruptedException { + rootDir = TEST_UTIL.getDefaultRootDirPath(); + CommonFSUtils.setRootDir(conf, rootDir); + TableName tableName = TableName.valueOf("testMultiSnapshotRestoreWithMerge"); + Path restoreDir = new Path("/hbase/.tmp-snapshot/restore-snapshot-dest"); + + byte[] columnFamily = Bytes.toBytes("A"); + Table table = TEST_UTIL.createTable(tableName, new byte[][] { columnFamily }, + new byte[][] { new byte[] { 'b' }, new byte[] { 'd' } }); + Put put1 = new Put(Bytes.toBytes("a")); // Region 1: [-∞, b) + put1.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val1")); + table.put(put1); + Put put2 = new Put(Bytes.toBytes("b")); // Region 2: [b, d) + put2.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val2")); + table.put(put2); + Put put3 = new Put(Bytes.toBytes("d")); // Region 3: [d, +∞) + put3.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val3")); + table.put(put3); + + TEST_UTIL.getAdmin().flush(tableName); + + String snapshotOne = tableName.getNameAsString() + "-snapshot-one"; + createAndAssertSnapshot(tableName, snapshotOne); + RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotOne); + flipComactions(false); + mergeRegions(tableName, 2); + String snapshotTwo = tableName.getNameAsString() + "-snapshot-two"; + createAndAssertSnapshot(tableName, snapshotTwo); + RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotTwo); + flipComactions(true); + String snapshotThree = tableName.getNameAsString() + "-snapshot-three"; + createAndAssertSnapshot(tableName, snapshotThree); + } + + private void createAndAssertSnapshot(TableName tableName, String snapshotName) + throws SnapshotCreationException, IllegalArgumentException, IOException { + org.apache.hadoop.hbase.client.SnapshotDescription snapshotDescOne = + new org.apache.hadoop.hbase.client.SnapshotDescription(snapshotName, tableName, + SnapshotType.FLUSH, null, EnvironmentEdgeManager.currentTime(), -1); + TEST_UTIL.getAdmin().snapshot(snapshotDescOne); + boolean isExist = TEST_UTIL.getAdmin().listSnapshots().stream() + .anyMatch(ele -> snapshotName.equals(ele.getName())); + assertTrue(isExist); + + } + + private void flipComactions(boolean isEnable) { Review Comment: nit ```suggestion private void flipCompactions(boolean isEnable) { ``` ########## hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelper.java: ########## @@ -218,6 +230,104 @@ public void testCopyExpiredSnapshotForScanner() throws IOException, InterruptedE .copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName)); } + /** + * Test scenario for HBASE-29346, which addresses the issue where restoring snapshots after region + * merge operations could lead to missing store file references, potentially resulting in data + * loss. + * <p> + * This test performs the following steps: + * </p> + * <ol> + * <li>Creates a table with multiple regions.</li> + * <li>Inserts data into each region and flushes to create store files.</li> + * <li>Takes snapshot of the table and performs restore.</li> + * <li>Disable compactions, merge regions, create a new snapshot, and restore that snapshot on the + * same restore path.</li> + * <li>Verifies data integrity by creating another snapshot.</li> + * </ol> + */ + @Test + public void testMultiSnapshotRestoreWithMerge() throws IOException, InterruptedException { + rootDir = TEST_UTIL.getDefaultRootDirPath(); + CommonFSUtils.setRootDir(conf, rootDir); + TableName tableName = TableName.valueOf("testMultiSnapshotRestoreWithMerge"); + Path restoreDir = new Path("/hbase/.tmp-snapshot/restore-snapshot-dest"); + + byte[] columnFamily = Bytes.toBytes("A"); + Table table = TEST_UTIL.createTable(tableName, new byte[][] { columnFamily }, + new byte[][] { new byte[] { 'b' }, new byte[] { 'd' } }); + Put put1 = new Put(Bytes.toBytes("a")); // Region 1: [-∞, b) + put1.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val1")); + table.put(put1); + Put put2 = new Put(Bytes.toBytes("b")); // Region 2: [b, d) + put2.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val2")); + table.put(put2); + Put put3 = new Put(Bytes.toBytes("d")); // Region 3: [d, +∞) + put3.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val3")); + table.put(put3); + + TEST_UTIL.getAdmin().flush(tableName); + + String snapshotOne = tableName.getNameAsString() + "-snapshot-one"; + createAndAssertSnapshot(tableName, snapshotOne); + RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotOne); + flipComactions(false); + mergeRegions(tableName, 2); + String snapshotTwo = tableName.getNameAsString() + "-snapshot-two"; + createAndAssertSnapshot(tableName, snapshotTwo); + RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotTwo); + flipComactions(true); + String snapshotThree = tableName.getNameAsString() + "-snapshot-three"; + createAndAssertSnapshot(tableName, snapshotThree); + } + + private void createAndAssertSnapshot(TableName tableName, String snapshotName) + throws SnapshotCreationException, IllegalArgumentException, IOException { + org.apache.hadoop.hbase.client.SnapshotDescription snapshotDescOne = + new org.apache.hadoop.hbase.client.SnapshotDescription(snapshotName, tableName, + SnapshotType.FLUSH, null, EnvironmentEdgeManager.currentTime(), -1); + TEST_UTIL.getAdmin().snapshot(snapshotDescOne); + boolean isExist = TEST_UTIL.getAdmin().listSnapshots().stream() + .anyMatch(ele -> snapshotName.equals(ele.getName())); + assertTrue(isExist); + + } + + private void flipComactions(boolean isEnable) { + int numLiveRegionServers = TEST_UTIL.getHBaseCluster().getNumLiveRegionServers(); + for (int serverNumber = 0; serverNumber < numLiveRegionServers; serverNumber++) { + HRegionServer regionServer = TEST_UTIL.getHBaseCluster().getRegionServer(serverNumber); + regionServer.getCompactSplitThread().setCompactionsEnabled(isEnable); + } + + } + + private void resetProcExecutorTestingKillFlag() { + final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); + assertTrue("expected executor to be running", procExec.isRunning()); + } Review Comment: unused now? ```suggestion ``` ########## hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelper.java: ########## @@ -218,6 +230,104 @@ public void testCopyExpiredSnapshotForScanner() throws IOException, InterruptedE .copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName)); } + /** + * Test scenario for HBASE-29346, which addresses the issue where restoring snapshots after region + * merge operations could lead to missing store file references, potentially resulting in data + * loss. + * <p> + * This test performs the following steps: + * </p> + * <ol> + * <li>Creates a table with multiple regions.</li> + * <li>Inserts data into each region and flushes to create store files.</li> + * <li>Takes snapshot of the table and performs restore.</li> + * <li>Disable compactions, merge regions, create a new snapshot, and restore that snapshot on the + * same restore path.</li> + * <li>Verifies data integrity by creating another snapshot.</li> + * </ol> + */ + @Test + public void testMultiSnapshotRestoreWithMerge() throws IOException, InterruptedException { + rootDir = TEST_UTIL.getDefaultRootDirPath(); + CommonFSUtils.setRootDir(conf, rootDir); + TableName tableName = TableName.valueOf("testMultiSnapshotRestoreWithMerge"); + Path restoreDir = new Path("/hbase/.tmp-snapshot/restore-snapshot-dest"); + + byte[] columnFamily = Bytes.toBytes("A"); + Table table = TEST_UTIL.createTable(tableName, new byte[][] { columnFamily }, + new byte[][] { new byte[] { 'b' }, new byte[] { 'd' } }); + Put put1 = new Put(Bytes.toBytes("a")); // Region 1: [-∞, b) + put1.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val1")); + table.put(put1); + Put put2 = new Put(Bytes.toBytes("b")); // Region 2: [b, d) + put2.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val2")); + table.put(put2); + Put put3 = new Put(Bytes.toBytes("d")); // Region 3: [d, +∞) + put3.addColumn(columnFamily, Bytes.toBytes("q"), Bytes.toBytes("val3")); + table.put(put3); + + TEST_UTIL.getAdmin().flush(tableName); + + String snapshotOne = tableName.getNameAsString() + "-snapshot-one"; + createAndAssertSnapshot(tableName, snapshotOne); + RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotOne); + flipComactions(false); + mergeRegions(tableName, 2); + String snapshotTwo = tableName.getNameAsString() + "-snapshot-two"; + createAndAssertSnapshot(tableName, snapshotTwo); + RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotTwo); + flipComactions(true); + String snapshotThree = tableName.getNameAsString() + "-snapshot-three"; + createAndAssertSnapshot(tableName, snapshotThree); Review Comment: Without the fix, when I tried to run locally, I encountered a timeout error. Did you see a data integrity error about the reference file specifically? ``` [ERROR] Errors: [ERROR] org.apache.hadoop.hbase.snapshot.TestRestoreSnapshotHelper.null [ERROR] Run 1: TestRestoreSnapshotHelper.testMultiSnapshotRestoreWithMerge:288->createAndAssertSnapshot:296 » TestTimedOut test timed out after 780 seconds [ERROR] Run 2: TestRestoreSnapshotHelper » Appears to be stuck in thread MiniHBaseClusterRegionServer-EventLoopGroup-3-1 [INFO] [ERROR] TestRestoreSnapshotHelper.testMultiSnapshotRestoreWithMerge:288->createAndAssertSnapshot:296 » InterruptedIO [INFO] [ERROR] Tests run: 2, Failures: 0, Errors: 2, Skipped: 0 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org