kgeisz commented on code in PR #7007:
URL: https://github.com/apache/hbase/pull/7007#discussion_r2127536048
##########
hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java:
##########
@@ -876,6 +890,139 @@ private boolean canAnyOtherBackupCover(List<BackupInfo>
allBackups, BackupInfo c
return false;
}
+ /**
+ * Cleans up Write-Ahead Logs (WALs) that are no longer required for PITR
after a successful
+ * backup deletion.
+ */
+ private void cleanUpUnusedBackupWALs() throws IOException {
+ Configuration conf = getConf() != null ? getConf() :
HBaseConfiguration.create();
+ String backupWalDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+
+ if (Strings.isNullOrEmpty(backupWalDir)) {
+ System.out.println("No WAL directory specified for continuous backup.
Skipping cleanup.");
+ return;
+ }
+
+ try (BackupSystemTable sysTable = new BackupSystemTable(conn)) {
+ // Get list of tables under continuous backup
+ Map<TableName, Long> continuousBackupTables =
sysTable.getContinuousBackupTableSet();
+ if (continuousBackupTables.isEmpty()) {
+ System.out.println("No continuous backups configured. Skipping WAL
cleanup.");
+ return;
+ }
+
+ // Find the earliest timestamp after which WALs are still needed
+ long cutoffTimestamp = determineWALCleanupCutoffTime(sysTable);
+ if (cutoffTimestamp == 0) {
+ System.err.println("ERROR: No valid full backup found. Skipping WAL
cleanup.");
+ return;
+ }
+
+ // Update metadata before actual cleanup to avoid inconsistencies
+ updateBackupTableStartTimes(sysTable, cutoffTimestamp);
+
+ // Delete WAL files older than cutoff timestamp
+ deleteOldWALFiles(conf, backupWalDir, cutoffTimestamp);
+
+ }
+ }
+
+ /**
+ * Determines the cutoff time for cleaning WAL files.
+ * @param sysTable Backup system table
+ * @return cutoff timestamp or 0 if not found
+ */
+ private long determineWALCleanupCutoffTime(BackupSystemTable sysTable)
throws IOException {
+ List<BackupInfo> backupInfos =
sysTable.getBackupInfos(BackupState.COMPLETE);
+ Collections.reverse(backupInfos); // Start from oldest
+
+ for (BackupInfo backupInfo : backupInfos) {
+ if (BackupType.FULL.equals(backupInfo.getType())) {
+ return backupInfo.getStartTs();
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Updates the start time for continuous backups if older than cutoff
timestamp.
+ * @param sysTable Backup system table
+ * @param cutoffTimestamp Timestamp before which WALs are no longer needed
+ */
+ private void updateBackupTableStartTimes(BackupSystemTable sysTable, long
cutoffTimestamp)
Review Comment:
Hey @vinayakphegde, this is the function that led me to ask for
clarification on why we need to update the start times of the continuous
backups. Maybe you could add another line or two to the docstring here that
elaborates on why we need to do this? That may make it more clear to others in
the future.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]