[ https://issues.apache.org/jira/browse/HBASE-28082?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17771019#comment-17771019 ]
Jan Van Besien commented on HBASE-28082: ---------------------------------------- I have a patch that works by making less assumptions about the actual file name other than that it starts with a ServerName (host,port,...). Can't seem to attach patch files though. {code:java} >From 40d88d9253c78e04823af49f199684bd8ac03966 Mon Sep 17 00:00:00 2001 From: Jan Van Besien <ja...@ngdata.com> Date: Mon, 2 Oct 2023 11:07:59 +0200 Subject: [PATCH] HBASE-28082 more lenient WAL hostname parsing Make the hostname parsing in BackupUtils#parseHostFromOldLog more lenient by not making any assumptions about the name of the file other than that it starts with a org.apache.hadoop.hbase.ServerName. --- .../hadoop/hbase/backup/util/BackupUtils.java | 10 +++++---- .../hadoop/hbase/backup/TestBackupUtils.java | 22 +++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java index 5be8eed3952..a920b55bca9 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java @@ -30,6 +30,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import java.util.TreeSet; +import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -365,10 +366,11 @@ public final class BackupUtils { return null; } try { - String n = p.getName(); - int idx = n.lastIndexOf(LOGNAME_SEPARATOR); - String s = URLDecoder.decode(n.substring(0, idx), "UTF8"); - return ServerName.valueOf(s).getAddress().toString(); + String urlDecodedName = URLDecoder.decode(p.getName(), "UTF8"); + Iterable<String> nameSplitsOnComma = Splitter.on(",").split(urlDecodedName); + String host = Iterables.get(nameSplitsOnComma, 0); + String port = Iterables.get(nameSplitsOnComma, 1); + return host + ":" + port; } catch (Exception e) { LOG.warn("Skip log file (can't parse): {}", p); return null; diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupUtils.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupUtils.java index 831ec309cfc..6aebe8db082 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupUtils.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupUtils.java @@ -87,21 +87,25 @@ public class TestBackupUtils { @Test public void testFilesystemWalHostNameParsing() throws IOException { - String host = "localhost"; + String host = "a-region-server.domain.com"; int port = 60030; ServerName serverName = ServerName.valueOf(host, port, 1234); Path walRootDir = CommonFSUtils.getWALRootDir(conf); Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME); - Path testWalPath = new Path(oldLogDir, - serverName.toString() + BackupUtils.LOGNAME_SEPARATOR + EnvironmentEdgeManager.currentTime()); - Path testMasterWalPath = - new Path(oldLogDir, testWalPath.getName() + MasterRegionFactory.ARCHIVED_WAL_SUFFIX); + Path testOldWalPath = new Path(oldLogDir, + serverName + BackupUtils.LOGNAME_SEPARATOR + EnvironmentEdgeManager.currentTime()); + Assert.assertEquals(host + Addressing.HOSTNAME_PORT_SEPARATOR + port, BackupUtils.parseHostFromOldLog(testOldWalPath)); - String parsedHost = BackupUtils.parseHostFromOldLog(testMasterWalPath); - Assert.assertNull(parsedHost); + Path testMasterWalPath = + new Path(oldLogDir, testOldWalPath.getName() + MasterRegionFactory.ARCHIVED_WAL_SUFFIX); + Assert.assertNull(BackupUtils.parseHostFromOldLog(testMasterWalPath)); - parsedHost = BackupUtils.parseHostFromOldLog(testWalPath); - Assert.assertEquals(parsedHost, host + Addressing.HOSTNAME_PORT_SEPARATOR + port); + // org.apache.hadoop.hbase.wal.BoundedGroupingStrategy does this + Path testOldWalWithRegionGroupingPath = new Path(oldLogDir, + serverName + BackupUtils.LOGNAME_SEPARATOR + serverName + + BackupUtils.LOGNAME_SEPARATOR + "regiongroup-0" + BackupUtils.LOGNAME_SEPARATOR + + EnvironmentEdgeManager.currentTime()); + Assert.assertEquals(host + Addressing.HOSTNAME_PORT_SEPARATOR + port, BackupUtils.parseHostFromOldLog(testOldWalWithRegionGroupingPath)); } } -- 2.41.0 {code} > oldWALs naming can be incompatible with HBase backup > ---------------------------------------------------- > > Key: HBASE-28082 > URL: https://issues.apache.org/jira/browse/HBASE-28082 > Project: HBase > Issue Type: Bug > Environment: Encountered on HBase > a2e7d2015e9f603e46339d0582e29a86843b9324 (branch-2), running in Kubernetes. > Reporter: Dieter De Paepe > Priority: Major > > I am testing HBase backup functionality, and noticed following warning when > running "hbase backup create incremental ...": > > {noformat} > 23/09/13 15:44:10 WARN org.apache.hadoop.hbase.backup.util.BackupUtils: Skip > log file (can't parse): > hdfs://hdfsns/hbase/hbase/oldWALs/hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.regiongroup-0.1694609969312{noformat} > It appears in my setup, the oldWALs are indeed given names that seem to break > "ServerName.valueOf(s)" in "BackupUtils#parseHostFromOldLog(Path p)": > > > {noformat} > user@hadoop-client-769bc9946-xqrt2:/$ hdfs dfs -ls hdfs:///hbase/hbase/oldWALs > Found 42 items > -rw-r--r-- 1 hbase hbase 775421 2023-09-13 13:14 > hdfs:///hbase/hbase/oldWALs/hbase-master-0.minikube-shared%2C16000%2C1694609954719.hbase-master-0.minikube-shared%2C16000%2C1694609954719.regiongroup-0.1694609957984$masterlocalwal$ > -rw-r--r-- 1 hbase hbase 26059 2023-09-13 13:29 > hdfs:///hbase/hbase/oldWALs/hbase-master-0.minikube-shared%2C16000%2C1694609954719.hbase-master-0.minikube-shared%2C16000%2C1694609954719.regiongroup-0.1694610867894$masterlocalwal$ > ... > -rw-r--r-- 1 hbase hbase 242479 2023-09-13 14:16 > hdfs:///hbase/hbase/oldWALs/hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.regiongroup-0.1694609969312 > -rw-r--r-- 1 hbase hbase 4364 2023-09-13 14:16 > hdfs:///hbase/hbase/oldWALs/hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.regiongroup-0.1694610188654 > ... > -rw-r--r-- 1 hbase hbase 70802 2023-09-13 13:15 > hdfs:///hbase/hbase/oldWALs/hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.meta.1694609970025.meta > -rw-r--r-- 1 hbase hbase 93 2023-09-13 13:04 > hdfs:///hbase/hbase/oldWALs/hbase-region-0.hbase-region.minikube-shared.svc.cluster.local%2C16020%2C1694609964681.meta.1694610188627.meta > ...{noformat} > I'd say this is not a bug in the backup system, but rather in whatever gives > the oldWAL files its name. I'm however not that familiar with HBase code to > find where these files are created. Any pointers are appreciated. > Given that this causes some logs to be missed during backup, I guess this can > lead to data loss in a backup restore? > -- This message was sent by Atlassian Jira (v8.20.10#820010)