This is an automated email from the ASF dual-hosted git repository. rmattingly pushed a commit to branch HBASE-29134-branch-3 in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 1d6cf234687769446c079d97ee8df58926073e28 Author: Hernan Romer <[email protected]> AuthorDate: Mon Apr 7 15:26:20 2025 -0400 HBASE-29134 Optimize bulkload backup process for incremental backups (#6706) Signed-off-by: Ray Mattingly <[email protected]> --- .../backup/impl/IncrementalTableBackupClient.java | 16 +++++-- .../hbase/backup/impl/MergeSplitBulkloadInfo.java | 55 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java index 52f824c5dda..7d8d2bb5ef6 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java @@ -122,8 +122,7 @@ public class IncrementalTableBackupClient extends TableBackupClient { * @param tablesToBackup list of tables to be backed up */ protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup) throws IOException { - List<String> activeFiles = new ArrayList<>(); - List<String> archiveFiles = new ArrayList<>(); + Map<TableName, MergeSplitBulkloadInfo> toBulkload = new HashMap<>(); List<BulkLoad> bulkLoads = backupManager.readBulkloadRows(tablesToBackup); FileSystem tgtFs; try { @@ -136,6 +135,8 @@ public class IncrementalTableBackupClient extends TableBackupClient { for (BulkLoad bulkLoad : bulkLoads) { TableName srcTable = bulkLoad.getTableName(); + MergeSplitBulkloadInfo bulkloadInfo = + toBulkload.computeIfAbsent(srcTable, MergeSplitBulkloadInfo::new); String regionName = bulkLoad.getRegion(); String fam = bulkLoad.getColumnFamily(); String filename = FilenameUtils.getName(bulkLoad.getHfilePath()); @@ -165,13 +166,18 @@ public class IncrementalTableBackupClient extends TableBackupClient { srcTableQualifier); LOG.trace("copying {} to {}", p, tgt); } - activeFiles.add(p.toString()); + bulkloadInfo.addActiveFile(p.toString()); } else if (fs.exists(archive)) { LOG.debug("copying archive {} to {}", archive, tgt); - archiveFiles.add(archive.toString()); + bulkloadInfo.addArchiveFiles(archive.toString()); } - mergeSplitAndCopyBulkloadedHFiles(activeFiles, archiveFiles, srcTable, tgtFs); } + + for (MergeSplitBulkloadInfo bulkloadInfo : toBulkload.values()) { + mergeSplitAndCopyBulkloadedHFiles(bulkloadInfo.getActiveFiles(), + bulkloadInfo.getArchiveFiles(), bulkloadInfo.getSrcTable(), tgtFs); + } + return bulkLoads; } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java new file mode 100644 index 00000000000..95243aebc46 --- /dev/null +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup.impl; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.TableName; +import org.apache.yetus.audience.InterfaceAudience; + [email protected] +class MergeSplitBulkloadInfo { + private final List<String> activeFiles = new ArrayList<>(); + private final List<String> archiveFiles = new ArrayList<>(); + + private final TableName srcTable; + + public MergeSplitBulkloadInfo(TableName srcTable) { + this.srcTable = srcTable; + } + + public TableName getSrcTable() { + return srcTable; + } + + public List<String> getArchiveFiles() { + return archiveFiles; + } + + public List<String> getActiveFiles() { + return activeFiles; + } + + public void addActiveFile(String file) { + activeFiles.add(file); + } + + public void addArchiveFiles(String file) { + archiveFiles.add(file); + } +}
