[ 
https://issues.apache.org/jira/browse/MAPREDUCE-4801?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13498300#comment-13498300
 ] 

Jason Lowe commented on MAPREDUCE-4801:
---------------------------------------

Some sample exceptions:

{noformat}
2012-11-15 12:47:02,365 [New I/O server worker #1-14] ERROR 
org.apache.hadoop.mapred.ShuffleHandler: Shuffle error: 
java.io.IOException: Connection reset by peer
        at sun.nio.ch.FileDispatcher.read0(Native Method)
        at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21)
        at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:237)
        at sun.nio.ch.IOUtil.read(IOUtil.java:204)
        at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236)
        at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:321)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.processSelectedKeys(NioWorker.java:280)
        at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:200)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
        at java.lang.Thread.run(Thread.java:619)
2012-11-15 12:47:02,366 [New I/O server worker #1-14] ERROR 
org.apache.hadoop.mapred.ShuffleHandler: Shuffle error [id: 0x01901fc1, 
/xx.xx.xx.xx:xx => /xx.xx.xx.xx:xx] EXCEPTION: java.io.IOException: Connection 
reset by peer
2012-11-15 12:47:02,366 [New I/O server worker #1-14] ERROR 
org.apache.hadoop.mapred.ShuffleHandler: Shuffle error: 
java.nio.channels.ClosedChannelException
        at 
org.jboss.netty.channel.socket.nio.NioWorker.cleanUpWriteBuffer(NioWorker.java:616)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.close(NioWorker.java:592)
        at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:355)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.processSelectedKeys(NioWorker.java:280)
        at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:200)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
        at java.lang.Thread.run(Thread.java:619)
...
2012-11-15 12:47:02,367 [New I/O server worker #1-15] ERROR 
org.apache.hadoop.mapred.ShuffleHandler: Shuffle error: 
java.io.IOException: Broken pipe
        at sun.nio.ch.FileDispatcher.write0(Native Method)
        at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:29)
        at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:100)
        at sun.nio.ch.IOUtil.write(IOUtil.java:56)
        at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:334)
        at 
org.jboss.netty.channel.socket.nio.SocketSendBufferPool$PooledSendBuffer.transferTo(SocketSendBufferPool.java:239)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.write0(NioWorker.java:469)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.writeFromUserCode(NioWorker.java:387)
        at 
org.jboss.netty.channel.socket.nio.NioServerSocketPipelineSink.handleAcceptedSocket(NioServerSocketPipelineSink.java:137)
        at 
org.jboss.netty.channel.socket.nio.NioServerSocketPipelineSink.eventSunk(NioServerSocketPipelineSink.java:76)
        at 
org.jboss.netty.handler.codec.oneone.OneToOneEncoder.handleDownstream(OneToOneEncoder.java:68)
        at 
org.jboss.netty.handler.stream.ChunkedWriteHandler.flush(ChunkedWriteHandler.java:255)
        at 
org.jboss.netty.handler.stream.ChunkedWriteHandler.handleDownstream(ChunkedWriteHandler.java:124)
        at org.jboss.netty.channel.Channels.write(Channels.java:611)
        at org.jboss.netty.channel.Channels.write(Channels.java:578)
        at 
org.jboss.netty.channel.AbstractChannel.write(AbstractChannel.java:259)
        at 
org.apache.hadoop.mapred.ShuffleHandler$Shuffle.sendMapOutput(ShuffleHandler.java:477)
        at 
org.apache.hadoop.mapred.ShuffleHandler$Shuffle.messageReceived(ShuffleHandler.java:397)
        at 
org.jboss.netty.handler.stream.ChunkedWriteHandler.handleUpstream(ChunkedWriteHandler.java:148)
        at 
org.jboss.netty.handler.codec.http.HttpChunkAggregator.messageReceived(HttpChunkAggregator.java:116)
        at 
org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:302)
        at 
org.jboss.netty.handler.codec.replay.ReplayingDecoder.unfoldAndfireMessageReceived(ReplayingDecoder.java:522)
        at 
org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:506)
        at 
org.jboss.netty.handler.codec.replay.ReplayingDecoder.messageReceived(ReplayingDecoder.java:443)
        at 
org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:274)
        at 
org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:261)
        at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:349)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.processSelectedKeys(NioWorker.java:280)
        at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:200)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
        at java.lang.Thread.run(Thread.java:619)
2012-11-15 12:47:02,367 [New I/O server worker #1-15] ERROR 
org.apache.hadoop.mapred.ShuffleHandler: Shuffle error [id: 0x01fd225b, 
/xx.xx.xx.xx:xx => /xx.xx.xx.xx:xx] EXCEPTION: java.io.IOException: Broken pipe
2012-11-15 12:47:02,367 [New I/O server worker #1-15] ERROR 
org.apache.hadoop.mapred.ShuffleHandler: Shuffle error: 
java.nio.channels.ClosedChannelException
        at 
org.jboss.netty.channel.socket.nio.NioWorker.cleanUpWriteBuffer(NioWorker.java:636)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.close(NioWorker.java:592)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.write0(NioWorker.java:512)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.writeFromUserCode(NioWorker.java:387)
        at 
org.jboss.netty.channel.socket.nio.NioServerSocketPipelineSink.handleAcceptedSocket(NioServerSocketPipelineSink.java:137)
        at 
org.jboss.netty.channel.socket.nio.NioServerSocketPipelineSink.eventSunk(NioServerSocketPipelineSink.java:76)
        at 
org.jboss.netty.handler.codec.oneone.OneToOneEncoder.handleDownstream(OneToOneEncoder.java:68)
        at 
org.jboss.netty.handler.stream.ChunkedWriteHandler.flush(ChunkedWriteHandler.java:255)
        at 
org.jboss.netty.handler.stream.ChunkedWriteHandler.handleDownstream(ChunkedWriteHandler.java:124)
        at org.jboss.netty.channel.Channels.write(Channels.java:611)
        at org.jboss.netty.channel.Channels.write(Channels.java:578)
        at 
org.jboss.netty.channel.AbstractChannel.write(AbstractChannel.java:259)
        at 
org.apache.hadoop.mapred.ShuffleHandler$Shuffle.sendMapOutput(ShuffleHandler.java:477)
        at 
org.apache.hadoop.mapred.ShuffleHandler$Shuffle.messageReceived(ShuffleHandler.java:397)
        at 
org.jboss.netty.handler.stream.ChunkedWriteHandler.handleUpstream(ChunkedWriteHandler.java:148)
        at 
org.jboss.netty.handler.codec.http.HttpChunkAggregator.messageReceived(HttpChunkAggregator.java:116)
        at 
org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:302)
        at 
org.jboss.netty.handler.codec.replay.ReplayingDecoder.unfoldAndfireMessageReceived(ReplayingDecoder.java:522)
        at 
org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:506)
        at 
org.jboss.netty.handler.codec.replay.ReplayingDecoder.messageReceived(ReplayingDecoder.java:443)
        at 
org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:274)
        at 
org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:261)
        at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:349)
        at 
org.jboss.netty.channel.socket.nio.NioWorker.processSelectedKeys(NioWorker.java:280)
        at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:200)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
        at java.lang.Thread.run(Thread.java:619)
{noformat}
                
> ShuffleHandler can generate large logs due to prematurely closed channels
> -------------------------------------------------------------------------
>
>                 Key: MAPREDUCE-4801
>                 URL: https://issues.apache.org/jira/browse/MAPREDUCE-4801
>             Project: Hadoop Map/Reduce
>          Issue Type: Bug
>    Affects Versions: 0.23.3, 2.0.1-alpha
>            Reporter: Jason Lowe
>            Priority: Critical
>
> We ran into an instance where many nodes on a cluster ran out of disk space 
> because the nodemanager logs were huge.  Examining the logs showed many, many 
> shuffle errors due to either ClosedChannelException or IOException from 
> "Connection reset by peer" or "Broken pipe".

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to