HBASE-19478 Utilize multi-get to speed up WAL file checking in BackupLogCleaner (Toshihiro Suzuki)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/cafd4e4a Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/cafd4e4a Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/cafd4e4a Branch: refs/heads/HBASE-19397 Commit: cafd4e4ad76f45be912edc9d5021f872de94fd5c Parents: 6c2aa4c Author: tedyu <[email protected]> Authored: Mon Jan 1 06:55:11 2018 -0800 Committer: tedyu <[email protected]> Committed: Mon Jan 1 06:55:11 2018 -0800 ---------------------------------------------------------------------- .../hbase/backup/impl/BackupSystemTable.java | 42 ++++++++++++++++++++ .../hbase/backup/master/BackupLogCleaner.java | 16 +++++--- .../hbase/backup/TestBackupSystemTable.java | 22 ++++++++++ 3 files changed, 74 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/cafd4e4a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java ---------------------------------------------------------------------- diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index 16b2497..6b721d4 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -37,6 +37,7 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; @@ -1091,6 +1092,47 @@ public final class BackupSystemTable implements Closeable { } /** + * Check if WAL file is eligible for deletion using multi-get + * @param files names of a file to check + * @return map of results + * (key: FileStatus object. value: true if the file is deletable, false otherwise) + * @throws IOException exception + */ + public Map<FileStatus, Boolean> areWALFilesDeletable(Iterable<FileStatus> files) + throws IOException { + final int BUF_SIZE = 100; + + Map<FileStatus, Boolean> ret = new HashMap<>(); + try (Table table = connection.getTable(tableName)) { + List<Get> getBuffer = new ArrayList<>(); + List<FileStatus> fileStatuses = new ArrayList<>(); + + for (FileStatus file : files) { + String wal = file.getPath().toString(); + Get get = createGetForCheckWALFile(wal); + getBuffer.add(get); + fileStatuses.add(file); + if (getBuffer.size() >= BUF_SIZE) { + Result[] results = table.get(getBuffer); + for (int i = 0; i < results.length; i++) { + ret.put(fileStatuses.get(i), !results[i].isEmpty()); + } + getBuffer.clear(); + fileStatuses.clear(); + } + } + + if (!getBuffer.isEmpty()) { + Result[] results = table.get(getBuffer); + for (int i = 0; i < results.length; i++) { + ret.put(fileStatuses.get(i), !results[i].isEmpty()); + } + } + } + return ret; + } + + /** * Checks if we have at least one backup session in backup system table This API is used by * BackupLogCleaner * @return true, if - at least one session exists in backup system table table http://git-wip-us.apache.org/repos/asf/hbase/blob/cafd4e4a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java ---------------------------------------------------------------------- diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java index 09c4861..a8ece39 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java @@ -96,14 +96,18 @@ public class BackupLogCleaner extends BaseLogCleanerDelegate { return files; } - for (FileStatus file : files) { + Map<FileStatus, Boolean> walFilesDeletableMap = table.areWALFilesDeletable(files); + for (Map.Entry<FileStatus, Boolean> entry: walFilesDeletableMap.entrySet()) { + FileStatus file = entry.getKey(); String wal = file.getPath().toString(); - boolean logInSystemTable = table.isWALFileDeletable(wal); - if (LOG.isDebugEnabled()) { - if (logInSystemTable) { + boolean deletable = entry.getValue(); + if (deletable) { + if (LOG.isDebugEnabled()) { LOG.debug("Found log file in backup system table, deleting: " + wal); - list.add(file); - } else { + } + list.add(file); + } else { + if (LOG.isDebugEnabled()) { LOG.debug("Didn't find this log in backup system table, keeping: " + wal); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/cafd4e4a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java ---------------------------------------------------------------------- diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java index ee5f9b9..f5ee268 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupSystemTable.java @@ -32,10 +32,13 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.TableName; @@ -335,6 +338,25 @@ public class TestBackupSystemTable { assertTrue(table.isWALFileDeletable(files.get(2))); assertFalse(table.isWALFileDeletable(newFile)); + // test for isWALFilesDeletable + List<FileStatus> fileStatues = new ArrayList<>(); + for (String file : files) { + FileStatus fileStatus = new FileStatus(); + fileStatus.setPath(new Path(file)); + fileStatues.add(fileStatus); + } + + FileStatus newFileStatus = new FileStatus(); + newFileStatus.setPath(new Path(newFile)); + fileStatues.add(newFileStatus); + + Map<FileStatus, Boolean> walFilesDeletableMap = table.areWALFilesDeletable(fileStatues); + + assertTrue(walFilesDeletableMap.get(fileStatues.get(0))); + assertTrue(walFilesDeletableMap.get(fileStatues.get(1))); + assertTrue(walFilesDeletableMap.get(fileStatues.get(2))); + assertFalse(walFilesDeletableMap.get(newFileStatus)); + cleanBackupTable(); }
