[ 
https://issues.apache.org/jira/browse/HBASE-9746?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lars Hofhansl updated HBASE-9746:
---------------------------------

    Attachment: 9746-trunk.txt

Here's a trunk patch. (Fixes Andy's suggestion)

If we find this generally too risky, we could have a specific Zookeeper wrapper 
that does this for replication only (would be a simple subclass of 
RecoverableZookeeper with just one or two method overidden).

(Although in trunk this is slightly more involved as we use the standard 
HConnection to connect to the replication slaves)

> RegionServer can't start when replication tries to replicate to an unknown 
> host
> -------------------------------------------------------------------------------
>
>                 Key: HBASE-9746
>                 URL: https://issues.apache.org/jira/browse/HBASE-9746
>             Project: HBase
>          Issue Type: Bug
>    Affects Versions: 0.94.12
>            Reporter: Lars Hofhansl
>            Priority: Minor
>             Fix For: 0.99.0, 2.0.0, 0.98.7, 0.94.24
>
>         Attachments: 9746-0.98.txt, 9746-trunk.txt
>
>
> Just ran into this:
> {code}
> 13/10/11 00:37:02 [regionserver60020] WARN  zookeeper.ZKConfig(204): 
> java.net.UnknownHostException: <old-host>: Name or service not known
>       at java.net.Inet6AddressImpl.lookupAllHostAddr(Native Method)
>       at java.net.InetAddress$1.lookupAllHostAddr(InetAddress.java:894)
>       at 
> java.net.InetAddress.getAddressesFromNameService(InetAddress.java:1286)
>       at java.net.InetAddress.getAllByName0(InetAddress.java:1239)
>       at java.net.InetAddress.getAllByName(InetAddress.java:1155)
>       at java.net.InetAddress.getAllByName(InetAddress.java:1091)
>       at java.net.InetAddress.getByName(InetAddress.java:1041)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZKConfig.getZKQuorumServersString(ZKConfig.java:201)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZKConfig.getZKQuorumServersString(ZKConfig.java:245)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:147)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
>       at 
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
>       at java.lang.Thread.run(Thread.java:722)
> 13/10/11 00:37:02 [regionserver60020] ERROR zookeeper.ZKConfig(210): no valid 
> quorum servers found in zoo.cfg
> 13/10/11 00:37:02 [regionserver60020] WARN  regionserver.HRegionServer(1108): 
> Exception in region server : 
> java.io.IOException: Unable to determine ZooKeeper ensemble
>       at org.apache.hadoop.hbase.zookeeper.ZKUtil.connect(ZKUtil.java:116)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:153)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
>       at 
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
>       at java.lang.Thread.run(Thread.java:722)
> 13/10/11 00:37:02 [regionserver60020] INFO  regionserver.HRegionServer(1823): 
> STOPPED: Failed initialization
> 13/10/11 00:37:02 [regionserver60020] ERROR regionserver.HRegionServer(1228): 
> Failed init
> java.io.IOException: Unable to determine ZooKeeper ensemble
>       at org.apache.hadoop.hbase.zookeeper.ZKUtil.connect(ZKUtil.java:116)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:153)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
>       at 
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
>       at java.lang.Thread.run(Thread.java:722)
> 13/10/11 00:37:02 [regionserver60020] FATAL regionserver.HRegionServer(1898): 
> ABORTING region server XXXXXXXX,60020,1381451821216: Unhandled exception: 
> Unable to determine ZooKeeper ensemble
> java.io.IOException: Unable to determine ZooKeeper ensemble
>       at org.apache.hadoop.hbase.zookeeper.ZKUtil.connect(ZKUtil.java:116)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:153)
>       at 
> org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.<init>(ZooKeeperWatcher.java:127)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.reloadZkWatcher(ReplicationPeer.java:170)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationPeer.<init>(ReplicationPeer.java:69)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.getPeer(ReplicationZookeeper.java:343)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectToPeer(ReplicationZookeeper.java:308)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.connectExistingPeers(ReplicationZookeeper.java:189)
>       at 
> org.apache.hadoop.hbase.replication.ReplicationZookeeper.<init>(ReplicationZookeeper.java:156)
>       at 
> org.apache.hadoop.hbase.replication.regionserver.Replication.initialize(Replication.java:89)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.newReplicationInstance(HRegionServer.java:3986)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.createNewReplicationInstance(HRegionServer.java:3955)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.setupWALAndReplication(HRegionServer.java:1412)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.handleReportForDutyResponse(HRegionServer.java:1096)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:749)
>       at java.lang.Thread.run(Thread.java:722)
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to