This is an automated email from the ASF dual-hosted git repository. andor pushed a commit to branch HBASE-28957 in repository https://gitbox.apache.org/repos/asf/hbase.git
commit aa4320e8355ffed8ed22fe74836a4eff2be4e9bd Author: vinayak hegde <vinayakph...@gmail.com> AuthorDate: Mon Jun 23 22:34:58 2025 +0530 HBASE-29350: Ensure Cleanup of Continuous Backup WALs After Last Backup is Force Deleted (#7090) Signed-off-by: Tak Lon (Stephen) Wu <tak...@apache.org> Reviewed by: Kevin Geiszler <kevin.j.geisz...@gmail.com> --- .../hadoop/hbase/backup/impl/BackupCommands.java | 73 +++++++++- .../hbase/backup/impl/BackupSystemTable.java | 2 +- .../hbase/backup/impl/FullTableBackupClient.java | 3 + .../apache/hadoop/hbase/backup/TestBackupBase.java | 11 +- .../hbase/backup/TestBackupDeleteWithCleanup.java | 150 +++++++++++++++++++-- 5 files changed, 218 insertions(+), 21 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java index 11b6890ed03..2020b84bc1c 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java @@ -84,8 +84,10 @@ import org.apache.hadoop.hbase.backup.HBackupFileSystem; import org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager; import org.apache.hadoop.hbase.backup.util.BackupSet; import org.apache.hadoop.hbase.backup.util.BackupUtils; +import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; @@ -892,7 +894,8 @@ public final class BackupCommands { /** * Cleans up Write-Ahead Logs (WALs) that are no longer required for PITR after a successful - * backup deletion. + * backup deletion. If no full backups are present, all WALs are deleted, tables are removed + * from continuous backup metadata, and the associated replication peer is disabled. */ private void cleanUpUnusedBackupWALs() throws IOException { Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); @@ -903,7 +906,8 @@ public final class BackupCommands { return; } - try (BackupSystemTable sysTable = new BackupSystemTable(conn)) { + try (Admin admin = conn.getAdmin(); + BackupSystemTable sysTable = new BackupSystemTable(conn)) { // Get list of tables under continuous backup Map<TableName, Long> continuousBackupTables = sysTable.getContinuousBackupTableSet(); if (continuousBackupTables.isEmpty()) { @@ -914,7 +918,15 @@ public final class BackupCommands { // Find the earliest timestamp after which WALs are still needed long cutoffTimestamp = determineWALCleanupCutoffTime(sysTable); if (cutoffTimestamp == 0) { - System.err.println("ERROR: No valid full backup found. Skipping WAL cleanup."); + // No full backup exists. PITR cannot function without a base full backup. + // Clean up all WALs, remove tables from backup metadata, and disable the replication + // peer. + System.out + .println("No full backups found. Cleaning up all WALs and disabling replication peer."); + + disableContinuousBackupReplicationPeer(admin); + removeAllTablesFromContinuousBackup(sysTable); + deleteAllBackupWALFiles(conf, backupWalDir); return; } @@ -944,6 +956,16 @@ public final class BackupCommands { return 0; } + private void disableContinuousBackupReplicationPeer(Admin admin) throws IOException { + for (ReplicationPeerDescription peer : admin.listReplicationPeers()) { + if (peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER) && peer.isEnabled()) { + admin.disableReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER); + System.out.println("Disabled replication peer: " + CONTINUOUS_BACKUP_REPLICATION_PEER); + break; + } + } + } + /** * Updates the start time for continuous backups if older than cutoff timestamp. * @param sysTable Backup system table @@ -966,6 +988,49 @@ public final class BackupCommands { } } + private void removeAllTablesFromContinuousBackup(BackupSystemTable sysTable) + throws IOException { + Map<TableName, Long> allTables = sysTable.getContinuousBackupTableSet(); + if (!allTables.isEmpty()) { + sysTable.removeContinuousBackupTableSet(allTables.keySet()); + System.out.println("Removed all tables from continuous backup metadata."); + } + } + + private void deleteAllBackupWALFiles(Configuration conf, String backupWalDir) + throws IOException { + try { + BackupFileSystemManager manager = + new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir); + FileSystem fs = manager.getBackupFs(); + Path walDir = manager.getWalsDir(); + Path bulkloadDir = manager.getBulkLoadFilesDir(); + + // Delete contents under WAL directory + if (fs.exists(walDir)) { + FileStatus[] walContents = fs.listStatus(walDir); + for (FileStatus item : walContents) { + fs.delete(item.getPath(), true); // recursive delete of each child + } + System.out.println("Deleted all contents under WAL directory: " + walDir); + } + + // Delete contents under bulk load directory + if (fs.exists(bulkloadDir)) { + FileStatus[] bulkContents = fs.listStatus(bulkloadDir); + for (FileStatus item : bulkContents) { + fs.delete(item.getPath(), true); // recursive delete of each child + } + System.out.println("Deleted all contents under Bulk Load directory: " + bulkloadDir); + } + + } catch (IOException e) { + System.out.println("WARNING: Failed to delete contents under backup directories: " + + backupWalDir + ". Error: " + e.getMessage()); + throw e; + } + } + /** * Cleans up old WAL and bulk-loaded files based on the determined cutoff timestamp. */ @@ -1010,7 +1075,7 @@ public final class BackupCommands { System.out.println("WARNING: Failed to parse directory name '" + dirName + "'. Skipping. Error: " + e.getMessage()); } catch (IOException e) { - System.out.println("WARNING: Failed to delete directory '" + dirPath + System.err.println("WARNING: Failed to delete directory '" + dirPath + "'. Skipping. Error: " + e.getMessage()); } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index d98f99e10cb..7a7d948e934 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -1510,7 +1510,7 @@ public final class BackupSystemTable implements Closeable { private Delete createDeleteForContinuousBackupTableSet(Set<TableName> tables) { Delete delete = new Delete(rowkey(CONTINUOUS_BACKUP_SET)); for (TableName tableName : tables) { - delete.addColumns(META_FAMILY, Bytes.toBytes(tableName.getNameAsString())); + delete.addColumn(META_FAMILY, Bytes.toBytes(tableName.getNameAsString())); } return delete; } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java index 3735817f487..3f6ae3deb63 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java @@ -305,6 +305,9 @@ public class FullTableBackupClient extends TableBackupClient { .collect(Collectors.toMap(tableName -> tableName, tableName -> new ArrayList<>())); try { + if (!admin.isReplicationPeerEnabled(CONTINUOUS_BACKUP_REPLICATION_PEER)) { + admin.enableReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER); + } admin.appendReplicationPeerTableCFs(CONTINUOUS_BACKUP_REPLICATION_PEER, tableMap); LOG.info("Updated replication peer {} with table and column family map.", CONTINUOUS_BACKUP_REPLICATION_PEER); diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java index 684b701daa7..131b6377345 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java @@ -415,13 +415,12 @@ public class TestBackupBase { return request; } - protected BackupRequest createBackupRequest(BackupType type, List<TableName> tables, String path, - boolean noChecksumVerify, boolean continuousBackupEnabled) { + protected BackupRequest createBackupRequest(BackupType type, List<TableName> tables, + String rootDir, boolean noChecksumVerify, boolean isContinuousBackupEnabled) { BackupRequest.Builder builder = new BackupRequest.Builder(); - BackupRequest request = builder.withBackupType(type).withTableList(tables) - .withTargetRootDir(path).withNoChecksumVerify(noChecksumVerify) - .withContinuousBackupEnabled(continuousBackupEnabled).build(); - return request; + return builder.withBackupType(type).withTableList(tables).withTargetRootDir(rootDir) + .withNoChecksumVerify(noChecksumVerify).withContinuousBackupEnabled(isContinuousBackupEnabled) + .build(); } protected String backupTables(BackupType type, List<TableName> tables, String path) diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java index 6d76ac4e89b..07c9110072b 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.backup; import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER; import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR; import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR; import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT; @@ -28,18 +29,25 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; +import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl; import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; +import org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.util.ToolRunner; +import org.junit.After; +import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -55,38 +63,55 @@ public class TestBackupDeleteWithCleanup extends TestBackupBase { String backupWalDirName = "TestBackupDeleteWithCleanup"; - @Test - public void testBackupDeleteWithCleanupLogic() throws Exception { + private FileSystem fs; + private Path backupWalDir; + private BackupSystemTable backupSystemTable; + + @Before + public void setUpTest() throws Exception { Path root = TEST_UTIL.getDataTestDirOnTestFS(); - Path backupWalDir = new Path(root, backupWalDirName); + backupWalDir = new Path(root, backupWalDirName); conf1.set(CONF_CONTINUOUS_BACKUP_WAL_DIR, backupWalDir.toString()); - FileSystem fs = FileSystem.get(conf1); + fs = FileSystem.get(conf1); fs.mkdirs(backupWalDir); + backupSystemTable = new BackupSystemTable(TEST_UTIL.getConnection()); + } + + @After + public void tearDownTest() throws Exception { + if (backupSystemTable != null) { + backupSystemTable.close(); + } + if (fs != null && backupWalDir != null) { + fs.delete(backupWalDir, true); + } + + EnvironmentEdgeManager.reset(); + } + @Test + public void testBackupDeleteWithCleanupLogic() throws Exception { // Step 1: Setup Backup Folders long currentTime = EnvironmentEdgeManager.getDelegate().currentTime(); - setupBackupFolders(fs, backupWalDir, currentTime); + setupBackupFolders(currentTime); // Log the directory structure before cleanup logDirectoryStructure(fs, backupWalDir, "Directory structure BEFORE cleanup:"); // Step 2: Simulate Backup Creation - BackupSystemTable backupSystemTable = new BackupSystemTable(TEST_UTIL.getConnection()); backupSystemTable.addContinuousBackupTableSet(Set.of(table1), currentTime - (2 * ONE_DAY_IN_MILLISECONDS)); EnvironmentEdgeManager .injectEdge(() -> System.currentTimeMillis() - (2 * ONE_DAY_IN_MILLISECONDS)); + String backupId = fullTableBackup(Lists.newArrayList(table1)); assertTrue(checkSucceeded(backupId)); - String anotherBackupId = fullTableBackup(Lists.newArrayList(table1)); assertTrue(checkSucceeded(anotherBackupId)); // Step 3: Run Delete Command - int ret = - ToolRunner.run(conf1, new BackupDriver(), new String[] { "delete", "-l", backupId, "-fd" }); - assertEquals(0, ret); + deleteBackup(backupId); // Log the directory structure after cleanup logDirectoryStructure(fs, backupWalDir, "Directory structure AFTER cleanup:"); @@ -96,6 +121,70 @@ public class TestBackupDeleteWithCleanup extends TestBackupBase { // Step 5: Verify System Table Update verifySystemTableUpdate(backupSystemTable, currentTime); + + // Cleanup + deleteBackup(anotherBackupId); + } + + @Test + public void testSingleBackupForceDelete() throws Exception { + // Step 1: Setup Backup Folders + long currentTime = EnvironmentEdgeManager.getDelegate().currentTime(); + setupBackupFolders(currentTime); + + // Log the directory structure before cleanup + logDirectoryStructure(fs, backupWalDir, "Directory structure BEFORE cleanup:"); + + // Step 2: Simulate Backup Creation + backupSystemTable.addContinuousBackupTableSet(Set.of(table1), + currentTime - (2 * ONE_DAY_IN_MILLISECONDS)); + + EnvironmentEdgeManager + .injectEdge(() -> System.currentTimeMillis() - (2 * ONE_DAY_IN_MILLISECONDS)); + + String backupId = fullTableBackupWithContinuous(Lists.newArrayList(table1)); + assertTrue(checkSucceeded(backupId)); + + assertTrue("Backup replication peer should be enabled after the backup", + continuousBackupReplicationPeerExistsAndEnabled()); + + // Step 3: Run Delete Command + deleteBackup(backupId); + + // Log the directory structure after cleanup + logDirectoryStructure(fs, backupWalDir, "Directory structure AFTER cleanup:"); + + // Step 4: Verify CONTINUOUS_BACKUP_REPLICATION_PEER is disabled + assertFalse("Backup replication peer should be disabled or removed", + continuousBackupReplicationPeerExistsAndEnabled()); + + // Step 5: Verify that system table is updated to remove all the tables + Set<TableName> remainingTables = backupSystemTable.getContinuousBackupTableSet().keySet(); + assertTrue("System table should have no tables after all full backups are clear", + remainingTables.isEmpty()); + + // Step 6: Verify that the backup WAL directory is empty + assertTrue("WAL backup directory should be empty after force delete", + areWalAndBulkloadDirsEmpty(conf1, backupWalDir.toString())); + + // Step 7: Take new full backup with continuous backup enabled + String backupIdContinuous = fullTableBackupWithContinuous(Lists.newArrayList(table1)); + + // Step 8: Verify CONTINUOUS_BACKUP_REPLICATION_PEER is enabled again + assertTrue("Backup replication peer should be re-enabled after new backup", + continuousBackupReplicationPeerExistsAndEnabled()); + + // And system table has new entry + Set<TableName> newTables = backupSystemTable.getContinuousBackupTableSet().keySet(); + assertTrue("System table should contain the table after new backup", + newTables.contains(table1)); + + // Cleanup + deleteBackup(backupIdContinuous); + } + + private void setupBackupFolders(long currentTime) throws IOException { + setupBackupFolders(fs, backupWalDir, currentTime); } public static void setupBackupFolders(FileSystem fs, Path backupWalDir, long currentTime) @@ -181,4 +270,45 @@ public class TestBackupDeleteWithCleanup extends TestBackupBase { } } } + + private boolean continuousBackupReplicationPeerExistsAndEnabled() throws IOException { + return TEST_UTIL.getAdmin().listReplicationPeers().stream().anyMatch( + peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER) && peer.isEnabled()); + } + + private static boolean areWalAndBulkloadDirsEmpty(Configuration conf, String backupWalDir) + throws IOException { + BackupFileSystemManager manager = + new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir); + + FileSystem fs = manager.getBackupFs(); + Path walDir = manager.getWalsDir(); + Path bulkloadDir = manager.getBulkLoadFilesDir(); + + return isDirectoryEmpty(fs, walDir) && isDirectoryEmpty(fs, bulkloadDir); + } + + private static boolean isDirectoryEmpty(FileSystem fs, Path dirPath) throws IOException { + if (!fs.exists(dirPath)) { + // Directory doesn't exist — treat as empty + return true; + } + FileStatus[] entries = fs.listStatus(dirPath); + return entries == null || entries.length == 0; + } + + private static void deleteBackup(String backupId) throws Exception { + int ret = + ToolRunner.run(conf1, new BackupDriver(), new String[] { "delete", "-l", backupId, "-fd" }); + assertEquals(0, ret); + } + + private String fullTableBackupWithContinuous(List<TableName> tables) throws IOException { + try (BackupAdmin admin = new BackupAdminImpl(TEST_UTIL.getConnection())) { + BackupRequest request = + createBackupRequest(BackupType.FULL, new ArrayList<>(tables), BACKUP_ROOT_DIR, false, true); + return admin.backupTables(request); + } + } + }