[
https://issues.apache.org/jira/browse/HBASE-9746?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Lars Hofhansl updated HBASE-9746:
---------------------------------
Attachment: 9746-trunk.txt
Here's a trunk patch. (Fixes Andy's suggestion)
If we find this generally too risky, we could have a specific Zookeeper wrapper
that does this for replication only (would be a simple subclass of
RecoverableZookeeper with just one or two method overidden).
(Although in trunk this is slightly more involved as we use the standard
HConnection to connect to the replication slaves)
> RegionServer can't start when replication tries to replicate to an unknown
> host
> -------------------------------------------------------------------------------
>
> Key: HBASE-9746
> URL: https://issues.apache.org/jira/browse/HBASE-9746
> Project: HBase
> Issue Type: Bug
> Affects Versions: 0.94.12
> Reporter: Lars Hofhansl
> Priority: Minor
> Fix For: 0.99.0, 2.0.0, 0.98.7, 0.94.24
>
> Attachments: 9746-0.98.txt, 9746-trunk.txt
>
>
> Just ran into this:
> {code}
> 13/10/11 00:37:02 [regionserver60020] WARN zookeeper.ZKConfig(204):
> java.net.UnknownHostException: <old-host>: Name or service not known
> at java.net.Inet6AddressImpl.lookupAllHostAddr(Native Method)
> at java.net.InetAddress$1.lookupAllHostAddr(InetAddress.java:894)
> at
> java.net.InetAddress.getAddressesFromNameService(InetAddress.java:1286)
> at java.net.InetAddress.getAllByName0(InetAddress.java:1239)
> at java.net.InetAddress.getAllByName(InetAddress.java:1155)
> at java.net.InetAddress.getAllByName(InetAddress.java:1091)
> at java.net.InetAddress.getByName(InetAddress.java:1041)
> at
> org.apache.hadoop.hbase.zookeeper.ZKConfig.getZKQuorumServersString(ZKConfig.java:201)
> at
> org.apache.hadoop.hbase.zookeeper.ZKConfig.getZKQuorumServersString(ZKConfig.java:245)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:147)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
> at
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
> at java.lang.Thread.run(Thread.java:722)
> 13/10/11 00:37:02 [regionserver60020] ERROR zookeeper.ZKConfig(210): no valid
> quorum servers found in zoo.cfg
> 13/10/11 00:37:02 [regionserver60020] WARN regionserver.HRegionServer(1108):
> Exception in region server :
> java.io.IOException: Unable to determine ZooKeeper ensemble
> at org.apache.hadoop.hbase.zookeeper.ZKUtil.connect(ZKUtil.java:116)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:153)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
> at
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
> at java.lang.Thread.run(Thread.java:722)
> 13/10/11 00:37:02 [regionserver60020] INFO regionserver.HRegionServer(1823):
> STOPPED: Failed initialization
> 13/10/11 00:37:02 [regionserver60020] ERROR regionserver.HRegionServer(1228):
> Failed init
> java.io.IOException: Unable to determine ZooKeeper ensemble
> at org.apache.hadoop.hbase.zookeeper.ZKUtil.connect(ZKUtil.java:116)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:153)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
> at
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
> at java.lang.Thread.run(Thread.java:722)
> 13/10/11 00:37:02 [regionserver60020] FATAL regionserver.HRegionServer(1898):
> ABORTING region server XXXXXXXX,60020,1381451821216: Unhandled exception:
> Unable to determine ZooKeeper ensemble
> java.io.IOException: Unable to determine ZooKeeper ensemble
> at org.apache.hadoop.hbase.zookeeper.ZKUtil.connect(ZKUtil.java:116)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:153)
> at
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
> at
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
> at
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
> at
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
> at java.lang.Thread.run(Thread.java:722)
> {code}
--
This message was sent by Atlassian JIRA
(v6.2#6252)