[ 
https://issues.apache.org/jira/browse/HADOOP-17052?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17116998#comment-17116998
 ] 

Dhiraj Hegde commented on HADOOP-17052:
---------------------------------------

Here is one for the write pipeline:
{code:java}
java.nio.channels.UnresolvedAddressException
        at sun.nio.ch.Net.checkAddress(Net.java:101)
        at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:622)
        at 
org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
        at 
org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3483)
        at 
org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:796)
        at 
org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)
        at 
org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)
        at 
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1162)
        at 
org.apache.hadoop.hdfs.DFSInputStream.fetchBlockByteRange(DFSInputStream.java:1087)
        at org.apache.hadoop.hdfs.DFSInputStream.pread(DFSInputStream.java:1444)
        at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:1407)
        at 
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:89)
        at 
org.apache.hadoop.hbase.io.hfile.HFileBlock.positionalReadWithExtra(HFileBlock.java:834)
        at 
org.apache.hadoop.hbase.io.hfile.HFileBlock$AbstractFSReader.readAtOffset(HFileBlock.java:1530)
        at 
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1797)
        at 
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1624)
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderV2.readBlock(HFileReaderV2.java:455)
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderV2$AbstractScannerV2.readNextDataBlock(HFileReaderV2.java:733)
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderV2$ScannerV2.isNextBlock(HFileReaderV2.java:858)
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderV2$ScannerV2.positionForNextBlock(HFileReaderV2.java:853)
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderV2$ScannerV2._next(HFileReaderV2.java:870)
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderV2$ScannerV2.next(HFileReaderV2.java:890)
        at 
org.apache.hadoop.hbase.regionserver.StoreFileScanner.next(StoreFileScanner.java:169)
        at 
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:113)
        at 
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:667)
        at 
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:152)
        at 
org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.populateResult(HRegion.java:6175)
        at 
org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.nextInternal(HRegion.java:6335)
        at 
org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.nextRaw(HRegion.java:6113)
        at 
org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.next(HRegion.java:6090)
        at 
org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.next(HRegion.java:6076)
        at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:7258)
        at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:7216)
        at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:7209)
        at 
org.apache.hadoop.hbase.regionserver.RSRpcServices.get(RSRpcServices.java:2273)
        at 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:35068)
        at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2417)
        at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:124)
        at 
org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:186)
        at 
org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:166)

{code}

> NetUtils.connect() throws an exception the prevents any retries when hostname 
> resolution fails
> ----------------------------------------------------------------------------------------------
>
>                 Key: HADOOP-17052
>                 URL: https://issues.apache.org/jira/browse/HADOOP-17052
>             Project: Hadoop Common
>          Issue Type: Bug
>          Components: hdfs-client
>    Affects Versions: 2.10.0, 2.9.2, 3.2.1, 3.1.3
>            Reporter: Dhiraj Hegde
>            Assignee: Dhiraj Hegde
>            Priority: Major
>
> Hadoop components are increasingly being deployed on VMs and containers. One 
> aspect of this environment is that DNS is dynamic. Hostname records get 
> modified (or deleted/recreated) as a container in Kubernetes (or even VM) is 
> being created/recreated. In such dynamic environments, the initial DNS 
> resolution request might return resolution failure briefly as DNS client 
> doesn't always get the latest records. This has been observed in Kubernetes 
> in particular. In such cases NetUtils.connect() appears to throw 
> java.nio.channels.UnresolvedAddressException.  In much of Hadoop code (like 
> DFSInputStream and DFSOutputStream), the code is designed to retry 
> IOException. However, since UnresolvedAddressException is not child of 
> IOException, no retry happens and the code aborts immediately. It is much 
> better if NetUtils.connect() throws java.net.UnknownHostException as that is 
> derived from IOException and the code will treat this as a retry-able error.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to