This is an automated email from the ASF dual-hosted git repository.

ndimiduk pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new d18a97f4634 HBASE-28680 BackupLogCleaner causes HMaster WALs to pile 
up indefinitely (#6006)
d18a97f4634 is described below

commit d18a97f4634a72cebd9d8c1eb4e3079fad83b686
Author: Ray Mattingly <[email protected]>
AuthorDate: Fri Jun 21 09:17:54 2024 -0400

    HBASE-28680 BackupLogCleaner causes HMaster WALs to pile up indefinitely 
(#6006)
    
    We have been trying to setup daily incremental backups for hundreds of 
clusters at my day
    job. Recently we discovered that old WALs were piling up across many 
clusters inline with when we
    began running incremental backups.
    
    This led to the realization that the BackupLogCleaner will always skip 
archived HMaster WALs. This
    is a problem because, if a cleaner is skipping a given file, then the 
CleanerChore will never
    delete it.
    
    This seems like a misunderstanding of what it means to "skip" a WAL in a 
BaseLogCleanerDelegate,
    and, instead, we should always return these HMaster WALs as deletable from 
the perspective of the
    BackupLogCleaner. We could subject them to the same scrutiny as 
RegionServer WALs: are they older
    than the most recent successful backup? But, if I understand correctly, 
HMaster WALs do not
    contain any data relevant to table backups, so that would be unnecessary.
    
    Co-authored-by: Ray Mattingly <[email protected]>
    Signed-off-by: Nick Dimiduk <[email protected]>
---
 .../hbase/backup/master/BackupLogCleaner.java      | 58 ++++++++++++++--------
 .../hbase/backup/master/TestBackupLogCleaner.java  | 19 +++++++
 2 files changed, 57 insertions(+), 20 deletions(-)

diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
index 391c7d9b191..0eac3d617b0 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.BackupInfo;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.master.cleaner.BaseLogCleanerDelegate;
+import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
 import org.apache.hadoop.hbase.net.Address;
 import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
 import org.apache.hadoop.hbase.wal.WAL;
@@ -123,27 +125,8 @@ public class BackupLogCleaner extends 
BaseLogCleanerDelegate {
       return Collections.emptyList();
     }
     for (FileStatus file : files) {
-      String fn = file.getPath().getName();
-      if (fn.startsWith(WALProcedureStore.LOG_PREFIX)) {
+      if (canDeleteFile(addressToLastBackupMap, file.getPath())) {
         filteredFiles.add(file);
-        continue;
-      }
-
-      try {
-        Address walServerAddress =
-          
Address.fromString(BackupUtils.parseHostNameFromLogFile(file.getPath()));
-        long walTimestamp = WAL.getTimestamp(file.getPath().getName());
-
-        if (
-          !addressToLastBackupMap.containsKey(walServerAddress)
-            || addressToLastBackupMap.get(walServerAddress) >= walTimestamp
-        ) {
-          filteredFiles.add(file);
-        }
-      } catch (Exception ex) {
-        LOG.warn(
-          "Error occurred while filtering file: {} with error: {}. Ignoring 
cleanup of this log",
-          file.getPath(), ex.getMessage());
       }
     }
 
@@ -176,4 +159,39 @@ public class BackupLogCleaner extends 
BaseLogCleanerDelegate {
   public boolean isStopped() {
     return this.stopped;
   }
+
+  protected static boolean canDeleteFile(Map<Address, Long> 
addressToLastBackupMap, Path path) {
+    if (isHMasterWAL(path)) {
+      return true;
+    }
+
+    try {
+      String hostname = BackupUtils.parseHostNameFromLogFile(path);
+      if (hostname == null) {
+        LOG.warn(
+          "Cannot parse hostname from RegionServer WAL file: {}. Ignoring 
cleanup of this log",
+          path);
+        return false;
+      }
+      Address walServerAddress = Address.fromString(hostname);
+      long walTimestamp = WAL.getTimestamp(path.getName());
+
+      if (
+        !addressToLastBackupMap.containsKey(walServerAddress)
+          || addressToLastBackupMap.get(walServerAddress) >= walTimestamp
+      ) {
+        return true;
+      }
+    } catch (Exception ex) {
+      LOG.warn("Error occurred while filtering file: {}. Ignoring cleanup of 
this log", path, ex);
+      return false;
+    }
+    return false;
+  }
+
+  private static boolean isHMasterWAL(Path path) {
+    String fn = path.getName();
+    return fn.startsWith(WALProcedureStore.LOG_PREFIX)
+      || fn.endsWith(MasterRegionFactory.ARCHIVED_WAL_SUFFIX);
+  }
 }
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
index 2b0f9c0cba5..e372c6ad153 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
@@ -20,10 +20,12 @@ package org.apache.hadoop.hbase.backup.master;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.BackupType;
@@ -132,4 +134,21 @@ public class TestBackupLogCleaner extends TestBackupBase {
       conn.close();
     }
   }
+
+  @Test
+  public void testCleansUpHMasterWal() {
+    Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163");
+    assertTrue(BackupLogCleaner.canDeleteFile(Collections.emptyMap(), path));
+  }
+
+  @Test
+  public void testCleansUpArchivedHMasterWal() {
+    Path normalPath =
+      new 
Path("/hbase/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$");
+    assertTrue(BackupLogCleaner.canDeleteFile(Collections.emptyMap(), 
normalPath));
+
+    Path masterPath = new Path(
+      
"/hbase/MasterData/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$");
+    assertTrue(BackupLogCleaner.canDeleteFile(Collections.emptyMap(), 
masterPath));
+  }
 }

Reply via email to