This is an automated email from the ASF dual-hosted git repository.
rmattingly pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.6 by this push:
new 1bd85c0a3dc HBASE-29134 Optimize bulkload backup process for
incremental backups (#6706) (#6890)
1bd85c0a3dc is described below
commit 1bd85c0a3dc1424a679e9c60905d06a22cd6c9fa
Author: Ray Mattingly <[email protected]>
AuthorDate: Tue Apr 8 09:29:44 2025 -0400
HBASE-29134 Optimize bulkload backup process for incremental backups
(#6706) (#6890)
Signed-off-by: Ray Mattingly <[email protected]>
Co-authored-by: Hernan Romer <[email protected]>
---
.../backup/impl/IncrementalTableBackupClient.java | 16 +++++--
.../hbase/backup/impl/MergeSplitBulkloadInfo.java | 55 ++++++++++++++++++++++
2 files changed, 66 insertions(+), 5 deletions(-)
diff --git
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
index 885f942c77a..05878867140 100644
---
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
+++
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
@@ -123,8 +123,7 @@ public class IncrementalTableBackupClient extends
TableBackupClient {
* @param tablesToBackup list of tables to be backed up
*/
protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup)
throws IOException {
- List<String> activeFiles = new ArrayList<>();
- List<String> archiveFiles = new ArrayList<>();
+ Map<TableName, MergeSplitBulkloadInfo> toBulkload = new HashMap<>();
List<BulkLoad> bulkLoads = backupManager.readBulkloadRows(tablesToBackup);
FileSystem tgtFs;
try {
@@ -137,6 +136,8 @@ public class IncrementalTableBackupClient extends
TableBackupClient {
for (BulkLoad bulkLoad : bulkLoads) {
TableName srcTable = bulkLoad.getTableName();
+ MergeSplitBulkloadInfo bulkloadInfo =
+ toBulkload.computeIfAbsent(srcTable, MergeSplitBulkloadInfo::new);
String regionName = bulkLoad.getRegion();
String fam = bulkLoad.getColumnFamily();
String filename = FilenameUtils.getName(bulkLoad.getHfilePath());
@@ -166,13 +167,18 @@ public class IncrementalTableBackupClient extends
TableBackupClient {
srcTableQualifier);
LOG.trace("copying {} to {}", p, tgt);
}
- activeFiles.add(p.toString());
+ bulkloadInfo.addActiveFile(p.toString());
} else if (fs.exists(archive)) {
LOG.debug("copying archive {} to {}", archive, tgt);
- archiveFiles.add(archive.toString());
+ bulkloadInfo.addArchiveFiles(archive.toString());
}
- mergeSplitAndCopyBulkloadedHFiles(activeFiles, archiveFiles, srcTable,
tgtFs);
}
+
+ for (MergeSplitBulkloadInfo bulkloadInfo : toBulkload.values()) {
+ mergeSplitAndCopyBulkloadedHFiles(bulkloadInfo.getActiveFiles(),
+ bulkloadInfo.getArchiveFiles(), bulkloadInfo.getSrcTable(), tgtFs);
+ }
+
return bulkLoads;
}
diff --git
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java
new file mode 100644
index 00000000000..95243aebc46
--- /dev/null
+++
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/MergeSplitBulkloadInfo.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.yetus.audience.InterfaceAudience;
+
[email protected]
+class MergeSplitBulkloadInfo {
+ private final List<String> activeFiles = new ArrayList<>();
+ private final List<String> archiveFiles = new ArrayList<>();
+
+ private final TableName srcTable;
+
+ public MergeSplitBulkloadInfo(TableName srcTable) {
+ this.srcTable = srcTable;
+ }
+
+ public TableName getSrcTable() {
+ return srcTable;
+ }
+
+ public List<String> getArchiveFiles() {
+ return archiveFiles;
+ }
+
+ public List<String> getActiveFiles() {
+ return activeFiles;
+ }
+
+ public void addActiveFile(String file) {
+ activeFiles.add(file);
+ }
+
+ public void addArchiveFiles(String file) {
+ archiveFiles.add(file);
+ }
+}