[ https://issues.apache.org/jira/browse/HDFS-15379?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17119650#comment-17119650 ]
ludun commented on HDFS-15379: ------------------------------ in SocketIOWithTimeout$SelectorPool.select we throw exception {quote}if (Thread.currentThread().isInterrupted()) { throw new InterruptedIOException("Interrupted while waiting for " + "IO on channel " + channel + ". " + timeout + " millis timeout left."); } {quote} and Thread.currentThread().isInterrupted did not clear the state of current thread {quote}public static boolean interrupted() { return currentThread().isInterrupted(true); } {quote} if we do something in same thread, like rpc to NameNode。 when connect to namenode again, in AbstractInterruptibleChannel.begin wll check the thread state {quote} protected final void begin() { if (interruptor == null) { interruptor = new Interruptible() { public void interrupt(Thread target) { synchronized (closeLock) { if (!open) return; open = false; interrupted = target; try { AbstractInterruptibleChannel.this.implCloseChannel(); } catch (IOException x) { } } }}; } blockedOn(interruptor); Thread me = Thread.currentThread(); if (me.isInterrupted()) interruptor.interrupt(me); }{quote} and throw exception in AbstractInterruptibleChannel.end {quote} protected final void end(boolean completed) throws AsynchronousCloseException { blockedOn(null); Thread interrupted = this.interrupted; if (interrupted != null && interrupted == Thread.currentThread()) { interrupted = null; throw new ClosedByInterruptException(); } if (!completed && !open) throw new AsynchronousCloseException(); }{quote} so we may be reset somewhere by Thread.currentThread().interrupted() if we want to use same thread to connect NameNode. > DataStreamer should reset thread interrupted state in createBlockOutputStream > ----------------------------------------------------------------------------- > > Key: HDFS-15379 > URL: https://issues.apache.org/jira/browse/HDFS-15379 > Project: Hadoop HDFS > Issue Type: Bug > Components: dfsclient > Affects Versions: 2.7.7, 3.1.3 > Reporter: ludun > Priority: Major > > In createBlockOutputStream if thread was interrupted becuase timeout to > conenct to DataNode. > {quote}2020-05-27 18:32:53,310 | DEBUG | Connecting to datanode > xx.xx.xx.xx:25009 | DataStreamer.java:251 > 2020-05-27 18:33:50,457 | INFO | Exception in createBlockOutputStream > blk_1115121199_41386360 | DataStreamer.java:1854 > java.io.InterruptedIOException: Interrupted while waiting for IO on channel > java.nio.channels.SocketChannel[connected local=/xx.xx.xx.xx:40370 > remote=/xx.xx.xx.xx:25009]. 615000 millis timeout left. > at > org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:342) > at > org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:157) > at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:161) > at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:131) > at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:118) > at java.io.FilterInputStream.read(FilterInputStream.java:83) > at java.io.FilterInputStream.read(FilterInputStream.java:83) > at > org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:551) > at > org.apache.hadoop.hdfs.DataStreamer.createBlockOutputStream(DataStreamer.java:1826) > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1743) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:718) > {quote} > then abandonBlockrpc to namenode also failed due to interrupted exception > immediately. > {quote}2020-05-27 18:33:50,461 | DEBUG | Connecting to xx/xx.xx.xx.xx:25000 | > Client.java:814 > 2020-05-27 18:33:50,462 | DEBUG | Failed to connect to server: > xx/xx.xx.xx.xx:25000: try once and fail. | Client.java:956 > java.nio.channels.ClosedByInterruptException > at > java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) > at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:659) > at > org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192) > at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531) > at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:720) > at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:823) > at org.apache.hadoop.ipc.Client$Connection.access$3700(Client.java:436) > at org.apache.hadoop.ipc.Client.getConnection(Client.java:1613) > at org.apache.hadoop.ipc.Client.call(Client.java:1444) > at org.apache.hadoop.ipc.Client.call(Client.java:1397) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:234) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118) > at com.sun.proxy.$Proxy10.abandonBlock(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.abandonBlock(ClientNamenodeProtocolTranslatorPB.java:509) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422) > at > org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165) > at > org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157) > at > org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359) > at com.sun.proxy.$Proxy11.abandonBlock(Unknown Source) > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1748) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:718) > {quote} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org