Hello,

when I try to merge segment using ...

bin/nutch mergesegs /user/nutch/crawl_al/MERGEDsegments -dir 
/user/nutch/my_crawl/segments -filter -slice 50000


.... I get the following error. What I'm doing wrong?

Thanks
Patricio

java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.io.Text.readString(Text.java:400)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.io.Text.readString(Text.java:400)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.io.Text.readString(Text.java:400)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

attempt_201110152026_0003_r_000001_1: log4j:WARN No appenders could be found 
for logger (org.apache.hadoop.hdfs.DFSClient).
attempt_201110152026_0003_r_000001_1: log4j:WARN Please initialize the log4j 
system properly.
java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.io.Text.readString(Text.java:400)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

attempt_201110152026_0003_r_000000_1: log4j:WARN No appenders could be found 
for logger (org.apache.hadoop.hdfs.DFSClient).
attempt_201110152026_0003_r_000000_1: log4j:WARN Please initialize the log4j 
system properly.
java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.io.Text.readString(Text.java:400)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.io.Text.readString(Text.java:400)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at 
org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

Exception in thread "main" java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252)
at org.apache.nutch.segment.SegmentMerger.merge(SegmentMerger.java:638)
at org.apache.nutch.segment.SegmentMerger.main(SegmentMerger.java:683

Reply via email to