This is an automated email from the ASF dual-hosted git repository.
ndimiduk pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new d18a97f4634 HBASE-28680 BackupLogCleaner causes HMaster WALs to pile
up indefinitely (#6006)
d18a97f4634 is described below
commit d18a97f4634a72cebd9d8c1eb4e3079fad83b686
Author: Ray Mattingly <[email protected]>
AuthorDate: Fri Jun 21 09:17:54 2024 -0400
HBASE-28680 BackupLogCleaner causes HMaster WALs to pile up indefinitely
(#6006)
We have been trying to setup daily incremental backups for hundreds of
clusters at my day
job. Recently we discovered that old WALs were piling up across many
clusters inline with when we
began running incremental backups.
This led to the realization that the BackupLogCleaner will always skip
archived HMaster WALs. This
is a problem because, if a cleaner is skipping a given file, then the
CleanerChore will never
delete it.
This seems like a misunderstanding of what it means to "skip" a WAL in a
BaseLogCleanerDelegate,
and, instead, we should always return these HMaster WALs as deletable from
the perspective of the
BackupLogCleaner. We could subject them to the same scrutiny as
RegionServer WALs: are they older
than the most recent successful backup? But, if I understand correctly,
HMaster WALs do not
contain any data relevant to table backups, so that would be unnecessary.
Co-authored-by: Ray Mattingly <[email protected]>
Signed-off-by: Nick Dimiduk <[email protected]>
---
.../hbase/backup/master/BackupLogCleaner.java | 58 ++++++++++++++--------
.../hbase/backup/master/TestBackupLogCleaner.java | 19 +++++++
2 files changed, 57 insertions(+), 20 deletions(-)
diff --git
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
index 391c7d9b191..0eac3d617b0 100644
---
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
+++
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
@@ -25,6 +25,7 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupInfo;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.cleaner.BaseLogCleanerDelegate;
+import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
import org.apache.hadoop.hbase.net.Address;
import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
import org.apache.hadoop.hbase.wal.WAL;
@@ -123,27 +125,8 @@ public class BackupLogCleaner extends
BaseLogCleanerDelegate {
return Collections.emptyList();
}
for (FileStatus file : files) {
- String fn = file.getPath().getName();
- if (fn.startsWith(WALProcedureStore.LOG_PREFIX)) {
+ if (canDeleteFile(addressToLastBackupMap, file.getPath())) {
filteredFiles.add(file);
- continue;
- }
-
- try {
- Address walServerAddress =
-
Address.fromString(BackupUtils.parseHostNameFromLogFile(file.getPath()));
- long walTimestamp = WAL.getTimestamp(file.getPath().getName());
-
- if (
- !addressToLastBackupMap.containsKey(walServerAddress)
- || addressToLastBackupMap.get(walServerAddress) >= walTimestamp
- ) {
- filteredFiles.add(file);
- }
- } catch (Exception ex) {
- LOG.warn(
- "Error occurred while filtering file: {} with error: {}. Ignoring
cleanup of this log",
- file.getPath(), ex.getMessage());
}
}
@@ -176,4 +159,39 @@ public class BackupLogCleaner extends
BaseLogCleanerDelegate {
public boolean isStopped() {
return this.stopped;
}
+
+ protected static boolean canDeleteFile(Map<Address, Long>
addressToLastBackupMap, Path path) {
+ if (isHMasterWAL(path)) {
+ return true;
+ }
+
+ try {
+ String hostname = BackupUtils.parseHostNameFromLogFile(path);
+ if (hostname == null) {
+ LOG.warn(
+ "Cannot parse hostname from RegionServer WAL file: {}. Ignoring
cleanup of this log",
+ path);
+ return false;
+ }
+ Address walServerAddress = Address.fromString(hostname);
+ long walTimestamp = WAL.getTimestamp(path.getName());
+
+ if (
+ !addressToLastBackupMap.containsKey(walServerAddress)
+ || addressToLastBackupMap.get(walServerAddress) >= walTimestamp
+ ) {
+ return true;
+ }
+ } catch (Exception ex) {
+ LOG.warn("Error occurred while filtering file: {}. Ignoring cleanup of
this log", path, ex);
+ return false;
+ }
+ return false;
+ }
+
+ private static boolean isHMasterWAL(Path path) {
+ String fn = path.getName();
+ return fn.startsWith(WALProcedureStore.LOG_PREFIX)
+ || fn.endsWith(MasterRegionFactory.ARCHIVED_WAL_SUFFIX);
+ }
}
diff --git
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
index 2b0f9c0cba5..e372c6ad153 100644
---
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
+++
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
@@ -20,10 +20,12 @@ package org.apache.hadoop.hbase.backup.master;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupType;
@@ -132,4 +134,21 @@ public class TestBackupLogCleaner extends TestBackupBase {
conn.close();
}
}
+
+ @Test
+ public void testCleansUpHMasterWal() {
+ Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163");
+ assertTrue(BackupLogCleaner.canDeleteFile(Collections.emptyMap(), path));
+ }
+
+ @Test
+ public void testCleansUpArchivedHMasterWal() {
+ Path normalPath =
+ new
Path("/hbase/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$");
+ assertTrue(BackupLogCleaner.canDeleteFile(Collections.emptyMap(),
normalPath));
+
+ Path masterPath = new Path(
+
"/hbase/MasterData/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$");
+ assertTrue(BackupLogCleaner.canDeleteFile(Collections.emptyMap(),
masterPath));
+ }
}