vinayakphegde commented on code in PR #7007:
URL: https://github.com/apache/hbase/pull/7007#discussion_r2125880412
##########
hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java:
##########
@@ -876,6 +890,140 @@ private boolean canAnyOtherBackupCover(List<BackupInfo>
allBackups, BackupInfo c
return false;
}
+ /**
+ * Cleans up Write-Ahead Logs (WALs) that are no longer required for PITR
after a successful
+ * backup deletion.
+ */
+ private void cleanUpUnusedBackupWALs() throws IOException {
+ Configuration conf = getConf() != null ? getConf() :
HBaseConfiguration.create();
+ String backupWalDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+
+ if (Strings.isNullOrEmpty(backupWalDir)) {
+ System.out.println("No WAL directory specified for continuous backup.
Skipping cleanup.");
+ return;
+ }
+
+ try (Connection conn = ConnectionFactory.createConnection(conf);
+ BackupSystemTable sysTable = new BackupSystemTable(conn)) {
+ // Get list of tables under continuous backup
+ Map<TableName, Long> continuousBackupTables =
sysTable.getContinuousBackupTableSet();
+ if (continuousBackupTables.isEmpty()) {
+ System.out.println("No continuous backups configured. Skipping WAL
cleanup.");
+ return;
+ }
+
+ // Find the earliest timestamp after which WALs are still needed
+ long cutoffTimestamp = determineWALCleanupCutoffTime(sysTable);
+ if (cutoffTimestamp == 0) {
+ System.err.println("ERROR: No valid full backup found. Skipping WAL
cleanup.");
+ return;
+ }
+
+ // Update metadata before actual cleanup to avoid inconsistencies
+ updateBackupTableStartTimes(sysTable, cutoffTimestamp);
+
+ // Delete WAL files older than cutoff timestamp
+ deleteOldWALFiles(conf, backupWalDir, cutoffTimestamp);
+
+ }
+ }
+
+ /**
+ * Determines the cutoff time for cleaning WAL files.
+ * @param sysTable Backup system table
+ * @return cutoff timestamp or 0 if not found
+ */
+ private long determineWALCleanupCutoffTime(BackupSystemTable sysTable)
throws IOException {
+ List<BackupInfo> backupInfos =
sysTable.getBackupInfos(BackupState.COMPLETE);
+ Collections.reverse(backupInfos); // Start from oldest
+
+ for (BackupInfo backupInfo : backupInfos) {
+ if (BackupType.FULL.equals(backupInfo.getType())) {
+ return backupInfo.getStartTs();
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Updates the start time for continuous backups if older than cutoff
timestamp.
+ * @param sysTable Backup system table
+ * @param cutoffTimestamp Timestamp before which WALs are no longer needed
+ */
+ private void updateBackupTableStartTimes(BackupSystemTable sysTable, long
cutoffTimestamp)
+ throws IOException {
+
+ Map<TableName, Long> backupTables =
sysTable.getContinuousBackupTableSet();
+ Set<TableName> tablesToUpdate = new HashSet<>();
+
+ for (Map.Entry<TableName, Long> entry : backupTables.entrySet()) {
+ if (entry.getValue() < cutoffTimestamp) {
+ tablesToUpdate.add(entry.getKey());
+ }
+ }
+
+ if (!tablesToUpdate.isEmpty()) {
+ sysTable.updateContinuousBackupTableSet(tablesToUpdate,
cutoffTimestamp);
+ }
+ }
+
+ /**
+ * Cleans up old WAL and bulk-loaded files based on the determined cutoff
timestamp.
+ */
+ private void deleteOldWALFiles(Configuration conf, String backupWalDir,
long cutoffTime)
+ throws IOException {
+ System.out.println("Starting WAL cleanup in backup directory: " +
backupWalDir
+ + " with cutoff time: " + cutoffTime);
+
+ BackupFileSystemManager manager =
+ new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf,
backupWalDir);
+ FileSystem fs = manager.getBackupFs();
+ Path walDir = manager.getWalsDir();
+ Path bulkloadDir = manager.getBulkLoadFilesDir();
+
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ System.out.println("Listing directories under: " + walDir);
+
+ FileStatus[] directories = fs.listStatus(walDir);
+
+ for (FileStatus dirStatus : directories) {
+ if (!dirStatus.isDirectory()) {
+ continue; // Skip files, we only want directories
+ }
+
+ Path dirPath = dirStatus.getPath();
+ String dirName = dirPath.getName();
+
+ try {
+ long dayStart = parseDayDirectory(dirName, dateFormat);
+ System.out
+ .println("Checking WAL directory: " + dirName + " (Start Time: " +
dayStart + ")");
+
+ // If WAL files of that day are older than cutoff time, delete them
+ if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
+ System.out.println("Deleting outdated WAL directory: " + dirPath);
+ fs.delete(dirPath, true);
Review Comment:
Sure, let me create a jira for that.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]