This is an automated email from the ASF dual-hosted git repository.

ndimiduk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 64c582fe9a8 HBASE-29029 Refactor BackupHFileCleaner + fix test (#6533)
64c582fe9a8 is described below

commit 64c582fe9a846b2f5e07b180cc6fae77431d726e
Author: DieterDP <90392398+dieterdp...@users.noreply.github.com>
AuthorDate: Fri Jun 13 15:41:51 2025 +0200

    HBASE-29029 Refactor BackupHFileCleaner + fix test (#6533)
    
    The TestBackupHFileCleaner test was broken, as it used
    a different API to register bulk loads than the API that
    is actually used to register bulk loads during backups.
    The test also incorrectly closed the FS of the HBaseTestingUtil,
    causing this test to block for about 5 minutes during shutdown.
    
    Both the test and BackupHFileCleaner itself were overly convoluted
    and are cleaned up. Methods in BackupSystemTable that could lead
    to incorrect use have been removed or deprecated (to fix their
    use case in HBASE-28715).
    
    Signed-off-by: Nick Dimiduk <ndimi...@apache.org>
---
 hbase-backup/pom.xml                               |   4 +
 .../hadoop/hbase/backup/BackupHFileCleaner.java    | 123 ++++++-------
 .../hbase/backup/impl/BackupSystemTable.java       | 190 ++++++---------------
 .../hbase/backup/TestBackupHFileCleaner.java       | 114 ++++++-------
 4 files changed, 160 insertions(+), 271 deletions(-)

diff --git a/hbase-backup/pom.xml b/hbase-backup/pom.xml
index 24c4ca4390f..9ae40d6eec4 100644
--- a/hbase-backup/pom.xml
+++ b/hbase-backup/pom.xml
@@ -112,6 +112,10 @@
       <scope>test</scope>
     </dependency>
     <!-- General dependencies -->
+    <dependency>
+      <groupId>com.github.stephenc.findbugs</groupId>
+      <artifactId>findbugs-annotations</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
index d7474997412..c9a76bef289 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
@@ -18,11 +18,8 @@
 package org.apache.hadoop.hbase.backup;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -31,6 +28,7 @@ import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
+import org.apache.hadoop.hbase.backup.impl.BulkLoad;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
@@ -42,106 +40,81 @@ import org.slf4j.LoggerFactory;
 import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
 
 /**
- * Implementation of a file cleaner that checks if an hfile is still 
referenced by backup before
- * deleting it from hfile archive directory.
+ * File cleaner that prevents deletion of HFiles that are still required by 
future incremental
+ * backups.
+ * <p>
+ * Bulk loaded HFiles that are needed by future updates are stored in the 
backup system table.
  */
 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
 public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements 
Abortable {
   private static final Logger LOG = 
LoggerFactory.getLogger(BackupHFileCleaner.class);
+
   private boolean stopped = false;
-  private boolean aborted;
-  private Configuration conf;
+  private boolean aborted = false;
   private Connection connection;
-  private long prevReadFromBackupTbl = 0, // timestamp of most recent read 
from backup:system table
-      secondPrevReadFromBackupTbl = 0; // timestamp of 2nd most recent read 
from backup:system table
-  // used by unit test to skip reading backup:system
-  private boolean checkForFullyBackedUpTables = true;
-  private List<TableName> fullyBackedUpTables = null;
-
-  private Set<String> getFilenameFromBulkLoad(Map<byte[], List<Path>>[] maps) {
-    Set<String> filenames = new HashSet<>();
-    for (Map<byte[], List<Path>> map : maps) {
-      if (map == null) {
-        continue;
-      }
-
-      for (List<Path> paths : map.values()) {
-        for (Path p : paths) {
-          filenames.add(p.getName());
-        }
-      }
-    }
-    return filenames;
-  }
-
-  private Set<String> loadHFileRefs(List<TableName> tableList) throws 
IOException {
-    if (connection == null) {
-      connection = ConnectionFactory.createConnection(conf);
-    }
-    try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
-      Map<byte[], List<Path>>[] res = tbl.readBulkLoadedFiles(null, tableList);
-      secondPrevReadFromBackupTbl = prevReadFromBackupTbl;
-      prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime();
-      return getFilenameFromBulkLoad(res);
-    }
-  }
-
-  @InterfaceAudience.Private
-  void setCheckForFullyBackedUpTables(boolean b) {
-    checkForFullyBackedUpTables = b;
-  }
+  // timestamp of most recent read from backup system table
+  private long prevReadFromBackupTbl = 0;
+  // timestamp of 2nd most recent read from backup system table
+  private long secondPrevReadFromBackupTbl = 0;
 
   @Override
   public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
-    if (conf == null) {
-      return files;
+    if (stopped) {
+      return Collections.emptyList();
     }
-    // obtain the Set of TableName's which have been fully backed up
-    // so that we filter BulkLoad to be returned from server
-    if (checkForFullyBackedUpTables) {
-      if (connection == null) {
-        return files;
-      }
 
-      try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
-        fullyBackedUpTables = new 
ArrayList<>(tbl.getTablesIncludedInBackups());
-      } catch (IOException ioe) {
-        LOG.error("Failed to get tables which have been fully backed up, 
skipping checking", ioe);
-        return Collections.emptyList();
+    // We use filenames because the HFile will have been moved to the archive 
since it
+    // was registered.
+    final Set<String> hfileFilenames = new HashSet<>();
+    try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
+      Set<TableName> tablesIncludedInBackups = fetchFullyBackedUpTables(tbl);
+      for (BulkLoad bulkLoad : tbl.readBulkloadRows(tablesIncludedInBackups)) {
+        hfileFilenames.add(new Path(bulkLoad.getHfilePath()).getName());
       }
-      Collections.sort(fullyBackedUpTables);
-    }
-    final Set<String> hfileRefs;
-    try {
-      hfileRefs = loadHFileRefs(fullyBackedUpTables);
+      LOG.debug("Found {} unique HFile filenames registered as bulk loads.", 
hfileFilenames.size());
     } catch (IOException ioe) {
-      LOG.error("Failed to read hfile references, skipping checking deletable 
files", ioe);
+      LOG.error(
+        "Failed to read registered bulk load references from backup system 
table, marking all files as non-deletable.",
+        ioe);
       return Collections.emptyList();
     }
-    Iterable<FileStatus> deletables = Iterables.filter(files, file -> {
-      // If the file is recent, be conservative and wait for one more scan of 
backup:system table
+
+    secondPrevReadFromBackupTbl = prevReadFromBackupTbl;
+    prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime();
+
+    return Iterables.filter(files, file -> {
+      // If the file is recent, be conservative and wait for one more scan of 
the bulk loads
       if (file.getModificationTime() > secondPrevReadFromBackupTbl) {
+        LOG.debug("Preventing deletion due to timestamp: {}", 
file.getPath().toString());
         return false;
       }
+      // A file can be deleted if it is not registered as a backup bulk load.
       String hfile = file.getPath().getName();
-      boolean foundHFileRef = hfileRefs.contains(hfile);
-      return !foundHFileRef;
+      if (hfileFilenames.contains(hfile)) {
+        LOG.debug("Preventing deletion due to bulk load registration in backup 
system table: {}",
+          file.getPath().toString());
+        return false;
+      } else {
+        LOG.debug("OK to delete: {}", file.getPath().toString());
+        return true;
+      }
     });
-    return deletables;
+  }
+
+  protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) 
throws IOException {
+    return tbl.getTablesIncludedInBackups();
   }
 
   @Override
   public boolean isFileDeletable(FileStatus fStat) {
-    // work is done in getDeletableFiles()
-    return true;
+    throw new IllegalStateException("This method should not be called");
   }
 
   @Override
   public void setConf(Configuration config) {
-    this.conf = config;
     this.connection = null;
     try {
-      this.connection = ConnectionFactory.createConnection(conf);
+      this.connection = ConnectionFactory.createConnection(config);
     } catch (IOException ioe) {
       LOG.error("Couldn't establish connection", ioe);
     }
@@ -156,7 +129,7 @@ public class BackupHFileCleaner extends 
BaseHFileCleanerDelegate implements Abor
       try {
         this.connection.close();
       } catch (IOException ioe) {
-        LOG.debug("Got " + ioe + " when closing connection");
+        LOG.debug("Got IOException when closing connection", ioe);
       }
     }
     this.stopped = true;
@@ -169,7 +142,7 @@ public class BackupHFileCleaner extends 
BaseHFileCleanerDelegate implements Abor
 
   @Override
   public void abort(String why, Throwable e) {
-    LOG.warn("Aborting ReplicationHFileCleaner because " + why, e);
+    LOG.warn("Aborting ReplicationHFileCleaner because {}", why, e);
     this.aborted = true;
     stop(why);
   }
diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
index c0296d64d9e..61a74450e8d 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
@@ -17,12 +17,14 @@
  */
 package org.apache.hadoop.hbase.backup.impl;
 
+import edu.umd.cs.findbugs.annotations.Nullable;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -319,68 +321,6 @@ public final class BackupSystemTable implements Closeable {
     }
   }
 
-  /*
-   * Used during restore
-   * @param backupId the backup Id
-   * @param sTableList List of tables
-   * @return array of Map of family to List of Paths
-   */
-  public Map<byte[], List<Path>>[] readBulkLoadedFiles(String backupId, 
List<TableName> sTableList)
-    throws IOException {
-    Scan scan = BackupSystemTable.createScanForBulkLoadedFiles(backupId);
-    @SuppressWarnings("unchecked")
-    Map<byte[], List<Path>>[] mapForSrc = new Map[sTableList == null ? 1 : 
sTableList.size()];
-    try (Table table = connection.getTable(bulkLoadTableName);
-      ResultScanner scanner = table.getScanner(scan)) {
-      Result res = null;
-      while ((res = scanner.next()) != null) {
-        res.advance();
-        TableName tbl = null;
-        byte[] fam = null;
-        String path = null;
-        for (Cell cell : res.listCells()) {
-          if (
-            CellUtil.compareQualifiers(cell, BackupSystemTable.TBL_COL, 0,
-              BackupSystemTable.TBL_COL.length) == 0
-          ) {
-            tbl = TableName.valueOf(CellUtil.cloneValue(cell));
-          } else if (
-            CellUtil.compareQualifiers(cell, BackupSystemTable.FAM_COL, 0,
-              BackupSystemTable.FAM_COL.length) == 0
-          ) {
-            fam = CellUtil.cloneValue(cell);
-          } else if (
-            CellUtil.compareQualifiers(cell, BackupSystemTable.PATH_COL, 0,
-              BackupSystemTable.PATH_COL.length) == 0
-          ) {
-            path = Bytes.toString(CellUtil.cloneValue(cell));
-          }
-        }
-        int srcIdx = IncrementalTableBackupClient.getIndex(tbl, sTableList);
-        if (srcIdx == -1) {
-          // the table is not among the query
-          continue;
-        }
-        if (mapForSrc[srcIdx] == null) {
-          mapForSrc[srcIdx] = new TreeMap<>(Bytes.BYTES_COMPARATOR);
-        }
-        List<Path> files;
-        if (!mapForSrc[srcIdx].containsKey(fam)) {
-          files = new ArrayList<Path>();
-          mapForSrc[srcIdx].put(fam, files);
-        } else {
-          files = mapForSrc[srcIdx].get(fam);
-        }
-        files.add(new Path(path));
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("found bulk loaded file : {} {} {}", tbl, 
Bytes.toString(fam), path);
-        }
-      }
-
-      return mapForSrc;
-    }
-  }
-
   /**
    * Deletes backup status from backup system table table
    * @param backupId backup id
@@ -433,79 +373,63 @@ public final class BackupSystemTable implements Closeable 
{
   }
 
   /**
-   * Reads the rows from backup table recording bulk loaded hfiles
-   * @param tableList list of table names
+   * Reads all registered bulk loads.
+   */
+  public List<BulkLoad> readBulkloadRows() throws IOException {
+    Scan scan = BackupSystemTable.createScanForOrigBulkLoadedFiles(null);
+    return processBulkLoadRowScan(scan);
+  }
+
+  /**
+   * Reads the registered bulk loads for the given tables.
    */
-  public List<BulkLoad> readBulkloadRows(List<TableName> tableList) throws 
IOException {
+  public List<BulkLoad> readBulkloadRows(Collection<TableName> tableList) 
throws IOException {
     List<BulkLoad> result = new ArrayList<>();
     for (TableName table : tableList) {
       Scan scan = BackupSystemTable.createScanForOrigBulkLoadedFiles(table);
-      try (Table bulkLoadTable = connection.getTable(bulkLoadTableName);
-        ResultScanner scanner = bulkLoadTable.getScanner(scan)) {
-        Result res;
-        while ((res = scanner.next()) != null) {
-          res.advance();
-          String fam = null;
-          String path = null;
-          String region = null;
-          byte[] row = null;
-          for (Cell cell : res.listCells()) {
-            row = CellUtil.cloneRow(cell);
-            String rowStr = Bytes.toString(row);
-            region = 
BackupSystemTable.getRegionNameFromOrigBulkLoadRow(rowStr);
-            if (
-              CellUtil.compareQualifiers(cell, BackupSystemTable.FAM_COL, 0,
-                BackupSystemTable.FAM_COL.length) == 0
-            ) {
-              fam = Bytes.toString(CellUtil.cloneValue(cell));
-            } else if (
-              CellUtil.compareQualifiers(cell, BackupSystemTable.PATH_COL, 0,
-                BackupSystemTable.PATH_COL.length) == 0
-            ) {
-              path = Bytes.toString(CellUtil.cloneValue(cell));
-            }
-          }
-          result.add(new BulkLoad(table, region, fam, path, row));
-          LOG.debug("found orig " + path + " for " + fam + " of table " + 
region);
-        }
-      }
+      result.addAll(processBulkLoadRowScan(scan));
     }
     return result;
   }
 
-  /*
-   * @param sTableList List of tables
-   * @param maps array of Map of family to List of Paths
-   * @param backupId the backup Id
-   */
-  public void writeBulkLoadedFiles(List<TableName> sTableList, Map<byte[], 
List<Path>>[] maps,
-    String backupId) throws IOException {
-    try (BufferedMutator bufferedMutator = 
connection.getBufferedMutator(bulkLoadTableName)) {
-      long ts = EnvironmentEdgeManager.currentTime();
-      int cnt = 0;
-      List<Put> puts = new ArrayList<>();
-      for (int idx = 0; idx < maps.length; idx++) {
-        Map<byte[], List<Path>> map = maps[idx];
-        TableName tn = sTableList.get(idx);
-
-        if (map == null) {
-          continue;
-        }
-
-        for (Map.Entry<byte[], List<Path>> entry : map.entrySet()) {
-          byte[] fam = entry.getKey();
-          List<Path> paths = entry.getValue();
-          for (Path p : paths) {
-            Put put = BackupSystemTable.createPutForBulkLoadedFile(tn, fam, 
p.toString(), backupId,
-              ts, cnt++);
-            puts.add(put);
+  private List<BulkLoad> processBulkLoadRowScan(Scan scan) throws IOException {
+    List<BulkLoad> result = new ArrayList<>();
+    try (Table bulkLoadTable = connection.getTable(bulkLoadTableName);
+      ResultScanner scanner = bulkLoadTable.getScanner(scan)) {
+      Result res;
+      while ((res = scanner.next()) != null) {
+        res.advance();
+        TableName table = null;
+        String fam = null;
+        String path = null;
+        String region = null;
+        byte[] row = null;
+        for (Cell cell : res.listCells()) {
+          row = CellUtil.cloneRow(cell);
+          String rowStr = Bytes.toString(row);
+          region = BackupSystemTable.getRegionNameFromOrigBulkLoadRow(rowStr);
+          if (
+            CellUtil.compareQualifiers(cell, BackupSystemTable.TBL_COL, 0,
+              BackupSystemTable.TBL_COL.length) == 0
+          ) {
+            table = TableName.valueOf(CellUtil.cloneValue(cell));
+          } else if (
+            CellUtil.compareQualifiers(cell, BackupSystemTable.FAM_COL, 0,
+              BackupSystemTable.FAM_COL.length) == 0
+          ) {
+            fam = Bytes.toString(CellUtil.cloneValue(cell));
+          } else if (
+            CellUtil.compareQualifiers(cell, BackupSystemTable.PATH_COL, 0,
+              BackupSystemTable.PATH_COL.length) == 0
+          ) {
+            path = Bytes.toString(CellUtil.cloneValue(cell));
           }
         }
-      }
-      if (!puts.isEmpty()) {
-        bufferedMutator.mutate(puts);
+        result.add(new BulkLoad(table, region, fam, path, row));
+        LOG.debug("Found bulk load entry for table {}, family {}: {}", table, 
fam, path);
       }
     }
+    return result;
   }
 
   /**
@@ -1660,9 +1584,15 @@ public final class BackupSystemTable implements 
Closeable {
     }
   }
 
-  static Scan createScanForOrigBulkLoadedFiles(TableName table) {
+  /**
+   * Creates a scan to read all registered bulk loads for the given table, or 
for all tables if
+   * {@code table} is {@code null}.
+   */
+  static Scan createScanForOrigBulkLoadedFiles(@Nullable TableName table) {
     Scan scan = new Scan();
-    byte[] startRow = rowkey(BULK_LOAD_PREFIX, table.toString(), BLK_LD_DELIM);
+    byte[] startRow = table == null
+      ? BULK_LOAD_PREFIX_BYTES
+      : rowkey(BULK_LOAD_PREFIX, table.toString(), BLK_LD_DELIM);
     byte[] stopRow = Arrays.copyOf(startRow, startRow.length);
     stopRow[stopRow.length - 1] = (byte) (stopRow[stopRow.length - 1] + 1);
     scan.withStartRow(startRow);
@@ -1695,6 +1625,7 @@ public final class BackupSystemTable implements Closeable 
{
    * Used to query bulk loaded hfiles which have been copied by incremental 
backup
    * @param backupId the backup Id. It can be null when querying for all tables
    * @return the Scan object
+   * @deprecated This method is broken if a backupId is specified - see 
HBASE-28715
    */
   static Scan createScanForBulkLoadedFiles(String backupId) {
     Scan scan = new Scan();
@@ -1709,15 +1640,6 @@ public final class BackupSystemTable implements 
Closeable {
     return scan;
   }
 
-  static Put createPutForBulkLoadedFile(TableName tn, byte[] fam, String p, 
String backupId,
-    long ts, int idx) {
-    Put put = new Put(rowkey(BULK_LOAD_PREFIX, backupId + BLK_LD_DELIM + ts + 
BLK_LD_DELIM + idx));
-    put.addColumn(BackupSystemTable.META_FAMILY, TBL_COL, tn.getName());
-    put.addColumn(BackupSystemTable.META_FAMILY, FAM_COL, fam);
-    put.addColumn(BackupSystemTable.META_FAMILY, PATH_COL, Bytes.toBytes(p));
-    return put;
-  }
-
   /**
    * Creates Scan operation to load backup set list
    * @return scan operation
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
index b71e084e840..cfceada51a0 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
@@ -17,13 +17,13 @@
  */
 package org.apache.hadoop.hbase.backup;
 
-import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.IdentityHashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -32,11 +32,8 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtil;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -47,6 +44,8 @@ import org.junit.experimental.categories.Category;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
+
 @Category({ MasterTests.class, SmallTests.class })
 public class TestBackupHFileCleaner {
 
@@ -56,25 +55,24 @@ public class TestBackupHFileCleaner {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(TestBackupHFileCleaner.class);
   private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
-  private static Configuration conf = TEST_UTIL.getConfiguration();
-  private static TableName tableName = 
TableName.valueOf("backup.hfile.cleaner");
-  private static String famName = "fam";
-  static FileSystem fs = null;
-  Path root;
+  private final static Configuration conf = TEST_UTIL.getConfiguration();
+  private final static TableName tableNameWithBackup = 
TableName.valueOf("backup.hfile.cleaner");
+  private final static TableName tableNameWithoutBackup =
+    TableName.valueOf("backup.hfile.cleaner2");
+
+  private static FileSystem fs = null;
+
+  private Path root;
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     conf.setBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY, true);
-    TEST_UTIL.startMiniZKCluster();
     TEST_UTIL.startMiniCluster(1);
     fs = FileSystem.get(conf);
   }
 
   @AfterClass
   public static void tearDownAfterClass() throws Exception {
-    if (fs != null) {
-      fs.close();
-    }
     TEST_UTIL.shutdownMiniCluster();
   }
 
@@ -94,55 +92,47 @@ public class TestBackupHFileCleaner {
 
   @Test
   public void testGetDeletableFiles() throws IOException {
-    // 1. Create a file
-    Path file = new Path(root, "testIsFileDeletableWithNoHFileRefs");
-    fs.createNewFile(file);
-    // 2. Assert file is successfully created
-    assertTrue("Test file not created!", fs.exists(file));
-    BackupHFileCleaner cleaner = new BackupHFileCleaner();
-    cleaner.setConf(conf);
-    cleaner.setCheckForFullyBackedUpTables(false);
-    List<FileStatus> stats = new ArrayList<>();
-    // Prime the cleaner
-    cleaner.getDeletableFiles(stats);
-    // 3. Assert that file as is should be deletable
-    FileStatus stat = fs.getFileStatus(file);
-    stats.add(stat);
-    Iterable<FileStatus> deletable = cleaner.getDeletableFiles(stats);
-    boolean found = false;
-    for (FileStatus stat1 : deletable) {
-      if (stat.equals(stat1)) {
-        found = true;
-      }
-    }
-    assertTrue(
-      "Cleaner should allow to delete this file as there is no hfile reference 
" + "for it.",
-      found);
-
-    // 4. Add the file as bulk load
-    List<Path> list = new ArrayList<>(1);
-    list.add(file);
-    try (Connection conn = ConnectionFactory.createConnection(conf);
-      BackupSystemTable sysTbl = new BackupSystemTable(conn)) {
-      List<TableName> sTableList = new ArrayList<>();
-      sTableList.add(tableName);
-      @SuppressWarnings("unchecked")
-      IdentityHashMap<byte[], List<Path>>[] maps = new IdentityHashMap[1];
-      maps[0] = new IdentityHashMap<>();
-      maps[0].put(Bytes.toBytes(famName), list);
-      sysTbl.writeBulkLoadedFiles(sTableList, maps, "1");
-    }
+    FileStatus file1 = createFile("file1");
+    FileStatus file1Archived = createFile("archived/file1");
+    FileStatus file2 = createFile("file2");
+    FileStatus file3 = createFile("file3");
 
-    // 5. Assert file should not be deletable
-    deletable = cleaner.getDeletableFiles(stats);
-    found = false;
-    for (FileStatus stat1 : deletable) {
-      if (stat.equals(stat1)) {
-        found = true;
+    BackupHFileCleaner cleaner = new BackupHFileCleaner() {
+      @Override
+      protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) 
{
+        return Set.of(tableNameWithBackup);
       }
+    };
+    cleaner.setConf(conf);
+
+    Iterable<FileStatus> deletable;
+
+    // The first call will not allow any deletions because of the timestamp 
mechanism.
+    deletable = cleaner.getDeletableFiles(List.of(file1, file1Archived, file2, 
file3));
+    assertEquals(Set.of(), Sets.newHashSet(deletable));
+
+    // No bulk loads registered, so all files can be deleted.
+    deletable = cleaner.getDeletableFiles(List.of(file1, file1Archived, file2, 
file3));
+    assertEquals(Set.of(file1, file1Archived, file2, file3), 
Sets.newHashSet(deletable));
+
+    // Register some bulk loads.
+    try (BackupSystemTable backupSystem = new 
BackupSystemTable(TEST_UTIL.getConnection())) {
+      byte[] unused = new byte[] { 0 };
+      backupSystem.registerBulkLoad(tableNameWithBackup, unused,
+        Map.of(unused, List.of(file1.getPath())));
+      backupSystem.registerBulkLoad(tableNameWithoutBackup, unused,
+        Map.of(unused, List.of(file2.getPath())));
     }
-    assertFalse(
-      "Cleaner should not allow to delete this file as there is a hfile 
reference " + "for it.",
-      found);
+
+    // File 1 can no longer be deleted, because it is registered as a bulk 
load.
+    deletable = cleaner.getDeletableFiles(List.of(file1, file1Archived, file2, 
file3));
+    assertEquals(Set.of(file2, file3), Sets.newHashSet(deletable));
+  }
+
+  private FileStatus createFile(String fileName) throws IOException {
+    Path file = new Path(root, fileName);
+    fs.createNewFile(file);
+    assertTrue("Test file not created!", fs.exists(file));
+    return fs.getFileStatus(file);
   }
 }

Reply via email to