This is an automated email from the ASF dual-hosted git repository.

dieter pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 1586b292545 HBASE-29800 WAL logs are unprotected during first full 
backup
1586b292545 is described below

commit 1586b292545f96b0b76097acfda751523d5fab13
Author: Dieter De Paepe <[email protected]>
AuthorDate: Fri Feb 6 22:22:24 2026 +0100

    HBASE-29800 WAL logs are unprotected during first full backup
    
    The BackupLogCleaner prevents WAL files that are needed for future backups
    from being deleted. In the case where a backup root has a single running
    backup, there was a small timeframe where relevant files were unprotected
    because only completed backups were taken into consideration. This commit
    fixes this.
    
    The old mechanism relied on the "backup start code", which is a timestamp
    that denotes (per backup root) the lowest (earliest) log-roll timestamp that
    occurred for the backup. Because this concept had no added value, but is
    complex to reason about, it is removed. Usages are replaced with equal
    behavior based on timestamps stored in the backup info. (The backup start
    codes were calculated in the same way, just stored separately.)
    
    Note that the backup start code calculation suffers from HBASE-29628
    (log-roll timestamps of decommissioned region servers are not cleaned up,
    causing the start code to be lower than it should be). That problem is
    still present in this commit.
---
 .../org/apache/hadoop/hbase/backup/BackupInfo.java |  7 +-
 .../hadoop/hbase/backup/impl/BackupManager.java    | 20 ------
 .../hbase/backup/impl/BackupSystemTable.java       | 65 ------------------
 .../hbase/backup/impl/FullTableBackupClient.java   | 19 +-----
 .../backup/impl/IncrementalBackupManager.java      | 29 +++-----
 .../backup/impl/IncrementalTableBackupClient.java  |  6 +-
 .../hbase/backup/master/BackupLogCleaner.java      | 43 +++++-------
 .../hadoop/hbase/backup/util/BackupBoundaries.java | 78 ++++++++++++++--------
 .../hadoop/hbase/backup/util/BackupUtils.java      | 42 +-----------
 .../apache/hadoop/hbase/backup/TestBackupBase.java | 18 +----
 .../hadoop/hbase/backup/TestBackupSystemTable.java |  9 ---
 .../hbase/backup/master/TestBackupLogCleaner.java  | 66 +++++++++++++++---
 12 files changed, 140 insertions(+), 262 deletions(-)

diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
index 39f903185a6..edf5a2b517f 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
@@ -148,8 +148,11 @@ public class BackupInfo implements Comparable<BackupInfo> {
   private List<String> incrBackupFileList;
 
   /**
-   * New region server log timestamps for table set after distributed log roll 
key - table name,
-   * value - map of RegionServer hostname -> last log rolled timestamp
+   * New region server log timestamps for table set after distributed log 
roll. The keys consist of
+   * all tables that are part of the backup chain of the backup root (not just 
the tables that were
+   * specified when creating the backup, which could be a subset). The value 
is a map of
+   * RegionServer hostname to the last log-roll timestamp, i.e. the point up 
to which logs are
+   * included in the backup.
    */
   private Map<TableName, Map<String, Long>> tableSetTimestampMap;
 
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
index 1de665f749c..dff3cc27b00 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
@@ -336,26 +336,6 @@ public class BackupManager implements Closeable {
     systemTable.finishBackupExclusiveOperation();
   }
 
-  /**
-   * Read the last backup start code (timestamp) of last successful backup. 
Will return null if
-   * there is no startcode stored in backup system table or the value is of 
length 0. These two
-   * cases indicate there is no successful backup completed so far.
-   * @return the timestamp of a last successful backup
-   * @throws IOException exception
-   */
-  public String readBackupStartCode() throws IOException {
-    return systemTable.readBackupStartCode(backupInfo.getBackupRootDir());
-  }
-
-  /**
-   * Write the start code (timestamp) to backup system table. If passed in 
null, then write 0 byte.
-   * @param startCode start code
-   * @throws IOException exception
-   */
-  public void writeBackupStartCode(Long startCode) throws IOException {
-    systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir());
-  }
-
   /**
    * Get the RS log information after the last log roll from backup system 
table.
    * @return RS log info
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
index 2701f7d4945..56ce7b30c72 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
@@ -458,48 +458,6 @@ public final class BackupSystemTable implements Closeable {
     }
   }
 
-  /**
-   * Read the last backup start code (timestamp) of last successful backup. 
Will return null if
-   * there is no start code stored on hbase or the value is of length 0. These 
two cases indicate
-   * there is no successful backup completed so far.
-   * @param backupRoot directory path to backup destination
-   * @return the timestamp of last successful backup
-   * @throws IOException exception
-   */
-  public String readBackupStartCode(String backupRoot) throws IOException {
-    LOG.trace("read backup start code from backup system table");
-
-    try (Table table = connection.getTable(tableName)) {
-      Get get = createGetForStartCode(backupRoot);
-      Result res = table.get(get);
-      if (res.isEmpty()) {
-        return null;
-      }
-      Cell cell = res.listCells().get(0);
-      byte[] val = CellUtil.cloneValue(cell);
-      if (val.length == 0) {
-        return null;
-      }
-      return new String(val, StandardCharsets.UTF_8);
-    }
-  }
-
-  /**
-   * Write the start code (timestamp) to backup system table. If passed in 
null, then write 0 byte.
-   * @param startCode  start code
-   * @param backupRoot root directory path to backup
-   * @throws IOException exception
-   */
-  public void writeBackupStartCode(Long startCode, String backupRoot) throws 
IOException {
-    if (LOG.isTraceEnabled()) {
-      LOG.trace("write backup start code to backup system table " + startCode);
-    }
-    try (Table table = connection.getTable(tableName)) {
-      Put put = createPutForStartCode(startCode.toString(), backupRoot);
-      table.put(put);
-    }
-  }
-
   /**
    * Exclusive operations are: create, delete, merge
    * @throws IOException if a table operation fails or an active backup 
exclusive operation is
@@ -1117,29 +1075,6 @@ public final class BackupSystemTable implements 
Closeable {
     return cellToBackupInfo(cell);
   }
 
-  /**
-   * Creates Get operation to retrieve start code from backup system table
-   * @return get operation
-   * @throws IOException exception
-   */
-  private Get createGetForStartCode(String rootPath) throws IOException {
-    Get get = new Get(rowkey(START_CODE_ROW, rootPath));
-    get.addFamily(BackupSystemTable.META_FAMILY);
-    get.readVersions(1);
-    return get;
-  }
-
-  /**
-   * Creates Put operation to store start code to backup system table
-   * @return put operation
-   */
-  private Put createPutForStartCode(String startCode, String rootPath) {
-    Put put = new Put(rowkey(START_CODE_ROW, rootPath));
-    put.addColumn(BackupSystemTable.META_FAMILY, Bytes.toBytes("startcode"),
-      Bytes.toBytes(startCode));
-    return put;
-  }
-
   /**
    * Creates Get to retrieve incremental backup table set from backup system 
table
    * @return get operation
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
index 7fb7a576880..c180bdb60ad 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.backup.BackupRequest;
 import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
 import org.apache.hadoop.hbase.backup.BackupType;
 import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
-import org.apache.hadoop.hbase.backup.util.BackupUtils;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -136,17 +135,7 @@ public class FullTableBackupClient extends 
TableBackupClient {
     try (Admin admin = conn.getAdmin()) {
       // Begin BACKUP
       beginBackup(backupManager, backupInfo);
-      String savedStartCode;
-      boolean firstBackup;
-      // do snapshot for full table backup
-
-      savedStartCode = backupManager.readBackupStartCode();
-      firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) 
== 0L;
-      if (firstBackup) {
-        // This is our first backup. Let's put some marker to system table so 
that we can hold the
-        // logs while we do the backup.
-        backupManager.writeBackupStartCode(0L);
-      }
+
       // We roll log here before we do the snapshot. It is possible there is 
duplicate data
       // in the log that is already in the snapshot. But if we do it after the 
snapshot, we
       // could have data loss.
@@ -194,15 +183,11 @@ public class FullTableBackupClient extends 
TableBackupClient {
       Map<TableName, Map<String, Long>> newTableSetTimestampMap =
         backupManager.readLogTimestampMap();
 
-      backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
-      Long newStartCode =
-        
BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
-      backupManager.writeBackupStartCode(newStartCode);
-
       backupManager.deleteBulkLoadedRows(
         
bulkLoadsToDelete.stream().map(BulkLoad::getRowKey).collect(Collectors.toList()));
 
       // backup complete
+      backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
       completeBackup(conn, backupInfo, BackupType.FULL, conf);
     } catch (Exception e) {
       failBackup(conn, backupInfo, backupManager, e, "Unexpected 
BackupException : ",
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalBackupManager.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalBackupManager.java
index c92c0747e83..57319367a9a 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalBackupManager.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalBackupManager.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.backup.impl;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -28,7 +29,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
 import org.apache.hadoop.hbase.backup.util.BackupUtils;
 import org.apache.hadoop.hbase.client.Admin;
@@ -62,23 +62,11 @@ public class IncrementalBackupManager extends BackupManager 
{
   public Map<String, Long> getIncrBackupLogFileMap() throws IOException {
     List<String> logList;
     Map<String, Long> newTimestamps;
-    Map<String, Long> previousTimestampMins;
+    Map<String, Long> previousTimestampMins =
+      BackupUtils.getRSLogTimestampMins(readLogTimestampMap());
 
-    String savedStartCode = readBackupStartCode();
-
-    // key: tableName
-    // value: <RegionServer,PreviousTimeStamp>
-    Map<TableName, Map<String, Long>> previousTimestampMap = 
readLogTimestampMap();
-
-    previousTimestampMins = 
BackupUtils.getRSLogTimestampMins(previousTimestampMap);
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("StartCode " + savedStartCode + "for backupID " + 
backupInfo.getBackupId());
-    }
     // get all new log files from .logs and .oldlogs after last TS and before 
new timestamp
-    if (
-      savedStartCode == null || previousTimestampMins == null || 
previousTimestampMins.isEmpty()
-    ) {
+    if (previousTimestampMins.isEmpty()) {
       throw new IOException("Cannot read any previous back up timestamps from 
backup system table. "
         + "In order to create an incremental backup, at least one full backup 
is needed.");
     }
@@ -93,7 +81,7 @@ public class IncrementalBackupManager extends BackupManager {
     }
     newTimestamps = readRegionServerLastLogRollResult();
 
-    logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, 
conf, savedStartCode);
+    logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, 
conf);
     logList = excludeProcV2WALs(logList);
     backupInfo.setIncrBackupFileList(logList);
 
@@ -121,16 +109,15 @@ public class IncrementalBackupManager extends 
BackupManager {
    * @param olderTimestamps  the timestamp for each region server of the last 
backup.
    * @param newestTimestamps the timestamp for each region server that the 
backup should lead to.
    * @param conf             the Hadoop and Hbase configuration
-   * @param savedStartCode   the startcode (timestamp) of last successful 
backup.
    * @return a list of log files to be backed up
    * @throws IOException exception
    */
   private List<String> getLogFilesForNewBackup(Map<String, Long> 
olderTimestamps,
-    Map<String, Long> newestTimestamps, Configuration conf, String 
savedStartCode)
-    throws IOException {
+    Map<String, Long> newestTimestamps, Configuration conf) throws IOException 
{
     LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + 
olderTimestamps
       + "\n newestTimestamps: " + newestTimestamps);
 
+    long prevBackupStartTs = Collections.min(olderTimestamps.values());
     Path walRootDir = CommonFSUtils.getWALRootDir(conf);
     Path logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
     Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
@@ -227,7 +214,7 @@ public class IncrementalBackupManager extends BackupManager 
{
        * our last backup.
        */
       if (oldTimeStamp == null) {
-        if (currentLogTS < Long.parseLong(savedStartCode)) {
+        if (currentLogTS < prevBackupStartTs) {
           // This log file is really old, its region server was before our 
last backup.
           continue;
         } else {
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
index e5599c8357c..56809cfb898 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
@@ -332,14 +332,10 @@ public class IncrementalTableBackupClient extends 
TableBackupClient {
       Map<TableName, Map<String, Long>> newTableSetTimestampMap =
         backupManager.readLogTimestampMap();
 
-      backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
-      Long newStartCode =
-        
BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
-      backupManager.writeBackupStartCode(newStartCode);
-
       List<BulkLoad> bulkLoads = handleBulkLoad(backupInfo.getTableNames());
 
       // backup complete
+      backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
       completeBackup(conn, backupInfo, BackupType.INCREMENTAL, conf);
 
       List<byte[]> bulkLoadedRows = Lists.transform(bulkLoads, 
BulkLoad::getRowKey);
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
index 81f22948c5b..45fe961ac25 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
@@ -17,12 +17,11 @@
  */
 package org.apache.hadoop.hbase.backup.master;
 
-import static org.apache.hadoop.hbase.backup.BackupInfo.withState;
-
 import java.io.IOException;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -31,7 +30,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.BackupInfo;
 import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
 import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
@@ -91,10 +89,11 @@ public class BackupLogCleaner extends 
BaseLogCleanerDelegate {
    * Calculates the timestamp boundary up to which all backup roots have 
already included the WAL.
    * I.e. WALs with a lower (= older) or equal timestamp are no longer needed 
for future incremental
    * backups.
+   * @param backups  all completed or running backups to use for the 
calculation of the boundary
+   * @param tsBuffer a buffer (in ms) to lower the boundary for the default 
bound
    */
-  private BackupBoundaries serverToPreservationBoundaryTs(BackupSystemTable 
sysTable)
-    throws IOException {
-    List<BackupInfo> backups = 
sysTable.getBackupHistory(withState(BackupState.COMPLETE));
+  protected static BackupBoundaries 
calculatePreservationBoundary(List<BackupInfo> backups,
+    long tsBuffer) {
     if (LOG.isDebugEnabled()) {
       LOG.debug(
         "Cleaning WALs if they are older than the WAL cleanup time-boundary. "
@@ -103,11 +102,12 @@ public class BackupLogCleaner extends 
BaseLogCleanerDelegate {
         
backups.stream().map(BackupInfo::getBackupId).sorted().collect(Collectors.joining(",
 ")));
     }
 
-    // This map tracks, for every backup root, the most recent created backup 
(= highest timestamp)
+    // This map tracks, for every backup root, the most recent (= highest 
timestamp) completed
+    // backup, or if there is no such one, the currently running backup (if 
any)
     Map<String, BackupInfo> newestBackupPerRootDir = new HashMap<>();
     for (BackupInfo backup : backups) {
       BackupInfo existingEntry = 
newestBackupPerRootDir.get(backup.getBackupRootDir());
-      if (existingEntry == null || existingEntry.getStartTs() < 
backup.getStartTs()) {
+      if (existingEntry == null || existingEntry.getState() == 
BackupState.RUNNING) {
         newestBackupPerRootDir.put(backup.getBackupRootDir(), backup);
       }
     }
@@ -119,24 +119,12 @@ public class BackupLogCleaner extends 
BaseLogCleanerDelegate {
           .collect(Collectors.joining(", ")));
     }
 
-    BackupBoundaries.BackupBoundariesBuilder builder =
-      BackupBoundaries.builder(getConf().getLong(TS_BUFFER_KEY, 
TS_BUFFER_DEFAULT));
-    for (BackupInfo backupInfo : newestBackupPerRootDir.values()) {
-      long startCode = 
Long.parseLong(sysTable.readBackupStartCode(backupInfo.getBackupRootDir()));
-      // Iterate over all tables in the timestamp map, which contains all 
tables covered in the
-      // backup root, not just the tables included in that specific backup 
(which could be a subset)
-      for (TableName table : backupInfo.getTableSetTimestampMap().keySet()) {
-        for (Map.Entry<String, Long> entry : 
backupInfo.getTableSetTimestampMap().get(table)
-          .entrySet()) {
-          builder.addBackupTimestamps(entry.getKey(), entry.getValue(), 
startCode);
-        }
-      }
-    }
-
+    BackupBoundaries.BackupBoundariesBuilder builder = 
BackupBoundaries.builder(tsBuffer);
+    newestBackupPerRootDir.values().forEach(builder::update);
     BackupBoundaries boundaries = builder.build();
 
     if (LOG.isDebugEnabled()) {
-      LOG.debug("Boundaries oldestStartCode: {}", 
boundaries.getOldestStartCode());
+      LOG.debug("Boundaries defaultBoundary: {}", 
boundaries.getDefaultBoundary());
       for (Map.Entry<Address, Long> entry : 
boundaries.getBoundaries().entrySet()) {
         LOG.debug("Server: {}, WAL cleanup boundary: {}", 
entry.getKey().getHostName(),
           entry.getValue());
@@ -159,10 +147,11 @@ public class BackupLogCleaner extends 
BaseLogCleanerDelegate {
     }
 
     BackupBoundaries boundaries;
-    try {
-      try (BackupSystemTable sysTable = new BackupSystemTable(conn)) {
-        boundaries = serverToPreservationBoundaryTs(sysTable);
-      }
+    try (BackupSystemTable sysTable = new BackupSystemTable(conn)) {
+      long tsBuffer = getConf().getLong(TS_BUFFER_KEY, TS_BUFFER_DEFAULT);
+      List<BackupInfo> backupHistory = sysTable.getBackupHistory(
+        i -> EnumSet.of(BackupState.COMPLETE, 
BackupState.RUNNING).contains(i.getState()));
+      boundaries = calculatePreservationBoundary(backupHistory, tsBuffer);
     } catch (IOException ex) {
       LOG.error("Failed to analyse backup history with exception: {}. 
Retaining all logs",
         ex.getMessage(), ex);
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupBoundaries.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupBoundaries.java
index b38c1bdb68d..6853fee7815 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupBoundaries.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupBoundaries.java
@@ -21,6 +21,8 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupInfo;
 import org.apache.hadoop.hbase.net.Address;
 import org.apache.hadoop.hbase.wal.WAL;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -35,22 +37,19 @@ import org.slf4j.LoggerFactory;
 @InterfaceAudience.Private
 public class BackupBoundaries {
   private static final Logger LOG = 
LoggerFactory.getLogger(BackupBoundaries.class);
-  private static final BackupBoundaries EMPTY_BOUNDARIES =
-    new BackupBoundaries(Collections.emptyMap(), Long.MAX_VALUE);
 
   // This map tracks, for every RegionServer, the least recent (= oldest / 
lowest timestamp)
   // inclusion in any backup. In other words, it is the timestamp boundary up 
to which all backup
   // roots have included the WAL in their backup.
   private final Map<Address, Long> boundaries;
 
-  // The minimum WAL roll timestamp from the most recent backup of each backup 
root, used as a
-  // fallback cleanup boundary for RegionServers without explicit backup 
boundaries (e.g., servers
-  // that joined after backups began)
-  private final long oldestStartCode;
+  // The fallback cleanup boundary for RegionServers without explicit backup 
boundaries
+  // (e.g., servers that joined after backups began can be checked against 
this boundary)
+  private final long defaultBoundary;
 
-  private BackupBoundaries(Map<Address, Long> boundaries, long 
oldestStartCode) {
+  private BackupBoundaries(Map<Address, Long> boundaries, long 
defaultBoundary) {
     this.boundaries = boundaries;
-    this.oldestStartCode = oldestStartCode;
+    this.defaultBoundary = defaultBoundary;
   }
 
   public boolean isDeletable(Path walLogPath) {
@@ -68,11 +67,11 @@ public class BackupBoundaries {
       long pathTs = WAL.getTimestamp(walLogPath.getName());
 
       if (!boundaries.containsKey(address)) {
-        boolean isDeletable = pathTs <= oldestStartCode;
+        boolean isDeletable = pathTs <= defaultBoundary;
         if (LOG.isDebugEnabled()) {
           LOG.debug(
-            "Boundary for {} not found. isDeletable = {} based on 
oldestStartCode = {} and WAL ts of {}",
-            walLogPath, isDeletable, oldestStartCode, pathTs);
+            "Boundary for {} not found. isDeletable = {} based on 
defaultBoundary = {} and WAL ts of {}",
+            walLogPath, isDeletable, defaultBoundary, pathTs);
         }
         return isDeletable;
       }
@@ -104,8 +103,8 @@ public class BackupBoundaries {
     return boundaries;
   }
 
-  public long getOldestStartCode() {
-    return oldestStartCode;
+  public long getDefaultBoundary() {
+    return defaultBoundary;
   }
 
   public static BackupBoundariesBuilder builder(long tsCleanupBuffer) {
@@ -116,34 +115,55 @@ public class BackupBoundaries {
     private final Map<Address, Long> boundaries = new HashMap<>();
     private final long tsCleanupBuffer;
 
-    private long oldestStartCode = Long.MAX_VALUE;
+    private long oldestStartTs = Long.MAX_VALUE;
 
     private BackupBoundariesBuilder(long tsCleanupBuffer) {
       this.tsCleanupBuffer = tsCleanupBuffer;
     }
 
-    public BackupBoundariesBuilder addBackupTimestamps(String host, long 
hostLogRollTs,
-      long backupStartCode) {
-      Address address = Address.fromString(host);
-      Long storedTs = boundaries.get(address);
-      if (storedTs == null || hostLogRollTs < storedTs) {
-        boundaries.put(address, hostLogRollTs);
+    /**
+     * Updates the boundaries based on the provided backup info.
+     * @param backupInfo the most recent completed backup info for a backup 
root, or if there is no
+     *                   such completed backup, the currently running backup.
+     */
+    public void update(BackupInfo backupInfo) {
+      switch (backupInfo.getState()) {
+        case COMPLETE:
+          // If a completed backup exists in the backup root, we want to 
protect all logs that
+          // have been created since the log-roll that happened for that 
backup.
+          for (TableName table : 
backupInfo.getTableSetTimestampMap().keySet()) {
+            for (Map.Entry<String, Long> entry : 
backupInfo.getTableSetTimestampMap().get(table)
+              .entrySet()) {
+              Address regionServerAddress = Address.fromString(entry.getKey());
+              Long logRollTs = entry.getValue();
+
+              Long storedTs = boundaries.get(regionServerAddress);
+              if (storedTs == null || logRollTs < storedTs) {
+                boundaries.put(regionServerAddress, logRollTs);
+              }
+            }
+          }
+          break;
+        case RUNNING:
+          // If there is NO completed backup in the backup root, there are no 
persisted log-roll
+          // timestamps available yet. But, we still want to protect all files 
that have been
+          // created since the start of the currently running backup.
+          oldestStartTs = Math.min(oldestStartTs, backupInfo.getStartTs());
+          break;
+        default:
+          throw new IllegalStateException("Unexpected backupInfo state: " + 
backupInfo.getState());
       }
-
-      if (oldestStartCode > backupStartCode) {
-        oldestStartCode = backupStartCode;
-      }
-
-      return this;
     }
 
     public BackupBoundaries build() {
       if (boundaries.isEmpty()) {
-        return EMPTY_BOUNDARIES;
+        long defaultBoundary = oldestStartTs - tsCleanupBuffer;
+        return new BackupBoundaries(Collections.emptyMap(), defaultBoundary);
       }
 
-      oldestStartCode -= tsCleanupBuffer;
-      return new BackupBoundaries(boundaries, oldestStartCode);
+      long oldestRollTs = Collections.min(boundaries.values());
+      long defaultBoundary = Math.min(oldestRollTs, oldestStartTs) - 
tsCleanupBuffer;
+      return new BackupBoundaries(boundaries, defaultBoundary);
     }
   }
 }
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
index b108ffca0a5..c7a6b149a87 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
@@ -90,30 +90,8 @@ public final class BackupUtils {
    */
   public static Map<String, Long>
     getRSLogTimestampMins(Map<TableName, Map<String, Long>> rsLogTimestampMap) 
{
-    if (rsLogTimestampMap == null || rsLogTimestampMap.isEmpty()) {
-      return null;
-    }
-
-    HashMap<String, Long> rsLogTimestampMins = new HashMap<>();
-    HashMap<String, HashMap<TableName, Long>> rsLogTimestampMapByRS = new 
HashMap<>();
-
-    for (Entry<TableName, Map<String, Long>> tableEntry : 
rsLogTimestampMap.entrySet()) {
-      TableName table = tableEntry.getKey();
-      Map<String, Long> rsLogTimestamp = tableEntry.getValue();
-      for (Entry<String, Long> rsEntry : rsLogTimestamp.entrySet()) {
-        String rs = rsEntry.getKey();
-        Long ts = rsEntry.getValue();
-        rsLogTimestampMapByRS.putIfAbsent(rs, new HashMap<>());
-        rsLogTimestampMapByRS.get(rs).put(table, ts);
-      }
-    }
-
-    for (Entry<String, HashMap<TableName, Long>> entry : 
rsLogTimestampMapByRS.entrySet()) {
-      String rs = entry.getKey();
-      rsLogTimestampMins.put(rs, BackupUtils.getMinValue(entry.getValue()));
-    }
-
-    return rsLogTimestampMins;
+    return rsLogTimestampMap.values().stream().flatMap(map -> 
map.entrySet().stream())
+      .collect(Collectors.toMap(Entry::getKey, Entry::getValue, Math::min));
   }
 
   /**
@@ -340,22 +318,6 @@ public final class BackupUtils {
     }
   }
 
-  /**
-   * Get the min value for all the Values a map.
-   * @param map map
-   * @return the min value
-   */
-  public static <T> Long getMinValue(Map<T, Long> map) {
-    Long minTimestamp = null;
-    if (map != null) {
-      ArrayList<Long> timestampList = new ArrayList<>(map.values());
-      Collections.sort(timestampList);
-      // The min among all the RS log timestamps will be kept in backup system 
table table.
-      minTimestamp = timestampList.get(0);
-    }
-    return minTimestamp;
-  }
-
   /**
    * Parses host name:port from archived WAL path
    * @param p path
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
index fa61970ad72..b1eb264ecdb 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
@@ -187,14 +187,11 @@ public class TestBackupBase {
         Map<TableName, Map<String, Long>> newTableSetTimestampMap =
           backupManager.readLogTimestampMap();
 
-        Long newStartCode =
-          
BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
-        backupManager.writeBackupStartCode(newStartCode);
-
         handleBulkLoad(backupInfo.getTableNames());
         failStageIf(Stage.stage_4);
 
         // backup complete
+        backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
         completeBackup(conn, backupInfo, BackupType.INCREMENTAL, conf);
 
       } catch (Exception e) {
@@ -221,16 +218,7 @@ public class TestBackupBase {
         // Begin BACKUP
         beginBackup(backupManager, backupInfo);
         failStageIf(Stage.stage_0);
-        String savedStartCode;
-        boolean firstBackup;
         // do snapshot for full table backup
-        savedStartCode = backupManager.readBackupStartCode();
-        firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) 
== 0L;
-        if (firstBackup) {
-          // This is our first backup. Let's put some marker to system table 
so that we can hold the
-          // logs while we do the backup.
-          backupManager.writeBackupStartCode(0L);
-        }
         failStageIf(Stage.stage_1);
         // We roll log here before we do the snapshot. It is possible there is 
duplicate data
         // in the log that is already in the snapshot. But if we do it after 
the snapshot, we
@@ -274,11 +262,9 @@ public class TestBackupBase {
         Map<TableName, Map<String, Long>> newTableSetTimestampMap =
           backupManager.readLogTimestampMap();
 
-        Long newStartCode =
-          
BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
-        backupManager.writeBackupStartCode(newStartCode);
         failStageIf(Stage.stage_4);
         // backup complete
+        backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
         completeBackup(conn, backupInfo, BackupType.FULL, conf);
 
       } catch (Exception e) {
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java
index 1f7444931f5..039669e2a46 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java
@@ -101,15 +101,6 @@ public class TestBackupSystemTable {
     cleanBackupTable();
   }
 
-  @Test
-  public void testWriteReadBackupStartCode() throws IOException {
-    long code = 100L;
-    table.writeBackupStartCode(code, "root");
-    String readCode = table.readBackupStartCode("root");
-    assertEquals(code, Long.parseLong(readCode));
-    cleanBackupTable();
-  }
-
   private void cleanBackupTable() throws IOException {
     Admin admin = UTIL.getAdmin();
     admin.disableTable(BackupSystemTable.getTableName(conf));
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
index 1c87cfd75c7..3eaa1c633cd 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -35,11 +36,11 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupInfo;
 import org.apache.hadoop.hbase.backup.BackupType;
 import org.apache.hadoop.hbase.backup.TestBackupBase;
 import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
 import org.apache.hadoop.hbase.backup.util.BackupBoundaries;
-import org.apache.hadoop.hbase.backup.util.BackupUtils;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionInfo;
@@ -223,10 +224,6 @@ public class TestBackupLogCleaner extends TestBackupBase {
       List<FileStatus> walsAfterB1 = 
getListOfWALFiles(TEST_UTIL.getConfiguration());
       LOG.info("WALs after B1: {}", walsAfterB1.size());
 
-      String startCodeStr = 
systemTable.readBackupStartCode(backupRoot1.toString());
-      long b1StartCode = Long.parseLong(startCodeStr);
-      LOG.info("B1 startCode: {}", b1StartCode);
-
       // Add a new RegionServer to the cluster
       LOG.info("Adding new RegionServer to cluster");
       rsThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer();
@@ -295,13 +292,15 @@ public class TestBackupLogCleaner extends TestBackupBase {
 
   @Test
   public void testCanDeleteFileWithNewServerWALs() {
-    long backupStartCode = 1000000L;
+    BackupInfo backup = new BackupInfo();
+    backup.setState(BackupInfo.BackupState.COMPLETE);
+    
backup.setTableSetTimestampMap(Collections.singletonMap(TableName.valueOf("table1"),
+      Collections.singletonMap("server1:60020", 1000000L)));
+    BackupBoundaries boundaries =
+      
BackupLogCleaner.calculatePreservationBoundary(Collections.singletonList(backup),
 0L);
+
     // Old WAL from before the backup
     Path oldWAL = new Path("/hbase/oldWALs/server1%2C60020%2C12345.500000");
-    String host = BackupUtils.parseHostNameFromLogFile(oldWAL);
-    BackupBoundaries boundaries = BackupBoundaries.builder(0L)
-      .addBackupTimestamps(host, backupStartCode, backupStartCode).build();
-
     assertTrue(BackupLogCleaner.canDeleteFile(boundaries, oldWAL),
       "WAL older than backup should be deletable");
 
@@ -310,12 +309,57 @@ public class TestBackupLogCleaner extends TestBackupBase {
     assertTrue(BackupLogCleaner.canDeleteFile(boundaries, boundaryWAL),
       "WAL at boundary should be deletable");
 
-    // WAL from a server that joined AFTER the backup
+    // WAL created after the backup boundary
+    Path newWal = new Path("/hbase/oldWALs/server1%2C60020%2C12345.1500000");
+    assertFalse(BackupLogCleaner.canDeleteFile(boundaries, newWal),
+      "WAL newer than backup should not be deletable");
+
+    // WAL from a new server that joined AFTER the backup
     Path newServerWAL = new 
Path("/hbase/oldWALs/newserver%2C60020%2C99999.1500000");
     assertFalse(BackupLogCleaner.canDeleteFile(boundaries, newServerWAL),
       "WAL from new server (after backup) should NOT be deletable");
   }
 
+  @Test
+  public void testFirstBackupProtectsFiles() {
+    BackupInfo backup = new BackupInfo();
+    backup.setBackupId("backup_1");
+    backup.setState(BackupInfo.BackupState.RUNNING);
+    backup.setStartTs(100L);
+    // Running backups have no TableSetTimestampMap
+
+    BackupBoundaries boundaries =
+      
BackupLogCleaner.calculatePreservationBoundary(Collections.singletonList(backup),
 5L);
+
+    // There's only a single backup, and it is still running, so it's a FULL 
backup.
+    // We expect files preceding the snapshot are deletable, but files after 
the start are not.
+    // Because this is not region-server-specific, the buffer is taken into 
account.
+    Path path = new Path("/hbase/oldWALs/server1%2C60020%2C12345.94");
+    assertTrue(BackupLogCleaner.canDeleteFile(boundaries, path));
+    path = new Path("/hbase/oldWALs/server1%2C60020%2C12345.95");
+    assertTrue(BackupLogCleaner.canDeleteFile(boundaries, path));
+    path = new Path("/hbase/oldWALs/server1%2C60020%2C12345.96");
+    assertFalse(BackupLogCleaner.canDeleteFile(boundaries, path));
+
+    // If there is an already completed backup in the same root, only that one 
matters.
+    // In this case, a region-server-specific timestamp is available, so the 
buffer is not used.
+    BackupInfo backup2 = new BackupInfo();
+    backup2.setBackupId("backup_2");
+    backup2.setState(BackupInfo.BackupState.COMPLETE);
+    backup2.setStartTs(80L);
+    
backup2.setTableSetTimestampMap(Collections.singletonMap(TableName.valueOf("table1"),
+      Collections.singletonMap("server1:60020", 90L)));
+
+    boundaries = 
BackupLogCleaner.calculatePreservationBoundary(Arrays.asList(backup, backup2), 
5L);
+
+    path = new Path("/hbase/oldWALs/server1%2C60020%2C12345.89");
+    assertTrue(BackupLogCleaner.canDeleteFile(boundaries, path));
+    path = new Path("/hbase/oldWALs/server1%2C60020%2C12345.90");
+    assertTrue(BackupLogCleaner.canDeleteFile(boundaries, path));
+    path = new Path("/hbase/oldWALs/server1%2C60020%2C12345.91");
+    assertFalse(BackupLogCleaner.canDeleteFile(boundaries, path));
+  }
+
   @Test
   public void testCleansUpHMasterWal() {
     Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163");


Reply via email to