Hi all, I'm having some weird issues running a teragen on my 8 node cluster.
while running: hadoop jar hadoop-examples-1.0.1.jar teragen -Dmapred.map.tasks=280 10000000000 tera-in i have 24 GB's of ram per node and 16 cpu's per node. I have configured 14 max map/ reduce in the mapred-site.xml and the replication is at 1 right now. everything else is pretty much standard. I'm getting this error repeatedly but the job still finishes... is there any reason for this? I have even more issues with terasort but i'll save that for a different email. java.io.IOException: All datanodes 192.168.127.33:50010 are bad. Aborting... at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.processDatanodeError(DFSClient.java:3093) at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2200(DFSClient.java:2586) at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2790 attempt_201204082117_0001_m_000005_0: log4j:WARN No appenders could be found for logger (org.apache.hadoop.hdfs.DFSClient). attempt_201204082117_0001_m_000005_0: log4j:WARN Please initialize the log4j system properly. //in the datanode logs it looks something like this: java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcher.write0(Native Method) at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:29) at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:69) at sun.nio.ch.IOUtil.write(IOUtil.java:40) at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:334) at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:55) at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:146) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:107) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:100) at java.io.DataOutputStream.writeShort(DataOutputStream.java:151) at org.apache.hadoop.hdfs.protocol.DataTransferProtocol$PipelineAck.write(DataTransferProtocol.java:136) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:890) at java.lang.Thread.run(Thread.java:662) 2012-04-08 20:05:50,933 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder blk_3108909130250839223_1646 0 Exception java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcher.write0(Native Method) at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:29) at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:69) at sun.nio.ch.IOUtil.write(IOUtil.java:40) at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:334) at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:55) at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:146) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:107) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:100) at java.io.DataOutputStream.writeShort(DataOutputStream.java:151) at org.apache.hadoop.hdfs.protocol.DataTransferProtocol$PipelineAck.write(DataTransferProtocol.java:136) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:890) at java.lang.Thread.run(Thread.java:662) 2012-04-08 20:05:50,933 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 0 for block blk_3108909130250839223_1646 terminating 2012-04-08 20:05:50,933 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: writeBlock blk_3108909130250839223_1646 received exception java.io.EOFException: while trying to read 65557 bytes 2012-04-08 20:05:50,934 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(192.168.127.40:50010, storageID=DS-611863444-192.168.127.40-50010-1333933147638, infoPort=50075, ipcPort=50020):DataXceiver java.io.EOFException: while trying to read 65557 bytes at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:268) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:312) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:376) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:532) at org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:398) at org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:107) at java.lang.Thread.run(Thread.java:662) 2012-04-08 20:05:51,085 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-9132421136669923382_1498 at file /scratch2/hdfs/data/current/subdir6/blk_-9132421136669923382 2012-04-08 20:05:51,103 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-7771124102901543621_1609 at file /scratch2/hdfs/data/current/subdir8/blk_-7771124102901543621 2012-04-08 20:05:51,106 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-7540353325435438996_1377 at file /scratch2/hdfs/data/current/blk_-7540353325435438996 2012-04-08 20:05:51,108 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-7134501298346216240_1486 at file /scratch2/hdfs/data/current/subdir6/blk_-7134501298346216240 2012-04-08 20:05:51,112 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-6669935170707692613_1538 at file /scratch2/hdfs/data/current/subdir7/blk_-6669935170707692613 2012-04-08 20:05:51,118 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-6596349132083909421_1570 at file /scratch2/hdfs/data/current/subdir8/blk_-6596349132083909421 2012-04-08 20:05:51,121 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-6007573181367846212_1478 at file /scratch2/hdfs/data/current/subdir6/blk_-6007573181367846212 2012-04-08 20:05:51,123 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted block blk_-3077738826613968428_1441 at file /scratch2/hdfs/data/current/subdir6/blk_-3077738826613968428 2012-04-08 20:05:51,135 WARN org.apache.hadoop.hdfs.server.datanode.DataNode: IOException in BlockReceiver.run(): java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcher.write0(Native Method) at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:29) at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:69) at sun.nio.ch.IOUtil.write(IOUtil.java:40) at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:334) at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:55) at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:146) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:107) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:100) at java.io.DataOutputStream.writeShort(DataOutputStream.java:150) at org.apache.hadoop.hdfs.protocol.DataTransferProtocol$PipelineAck.write(DataTransferProtocol.java:136) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:890) at java.lang.Thread.run(Thread.java:662) 2012-04-08 20:05:51,135 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder blk_7384898103117276009_1646 1 Exception java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcher.write0(Native Method) at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:29) at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:69) at sun.nio.ch.IOUtil.write(IOUtil.java:40) at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:334) at org.apache.hadoop.net.SocketOutputStream$Writer.performIO(SocketOutputStream.java:55) at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:146) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:107) at org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:100) at java.io.DataOutputStream.writeShort(DataOutputStream.java:150) at org.apache.hadoop.hdfs.protocol.DataTransferProtocol$PipelineAck.write(DataTransferProtocol.java:136) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:890) at java.lang.Thread.run(Thread.java:662) 2012-04-08 20:05:51,136 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 1 for block blk_7384898103117276009_1646 terminating 2012-04-08 20:05:51,136 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Exception in receiveBlock for block blk_7384898103117276009_1646 java.nio.channels.ClosedByInterruptException 2012-04-08 20:05:51,136 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: writeBlock blk_7384898103117276009_1646 received exception java.nio.channels.ClosedByInterruptException 2012-04-08 20:05:51,136 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(192.168.127.40:50010, storageID=DS-611863444-192.168.127.40-50010-1333933147638, infoPort=50075, ipcPort=50020):DataXceiver java.nio.channels.ClosedByInterruptException at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:184) at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:270) at org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55) at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:155) at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:128) at java.io.BufferedInputStream.read1(BufferedInputStream.java:256) at java.io.BufferedInputStream.read(BufferedInputStream.java:317) at java.io.DataInputStream.read(DataInputStream.java:132) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:265) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:312) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:376) at org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:532) at org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:398) at org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:107) at java.lang.Thread.run(Thread.java:662)