For more info, below is the dump from the OutOfMemoryError: Thread-347" prio=5 tid=390 RUNNABLE at org.apache.hadoop.mapred.IFile$Reader.readNextBlock(IFile.java:342) at org.apache.hadoop.mapred.IFile$Reader.next(IFile.java:404) Local Variable: org.apache.hadoop.io.DataInputBuffer#7 Local Variable: org.apache.hadoop.io.DataInputBuffer#6 Local Variable: org.apache.hadoop.mapred.IFile$Reader#3 at org.apache.hadoop.mapred.Merger$Segment.next(Merger.java:220) at org.apache.hadoop.mapred.Merger$MergeQueue.adjustPriorityQueue(Merger.java:330) Local Variable: org.apache.hadoop.mapred.Merger$Segment#2 at org.apache.hadoop.mapred.Merger$MergeQueue.next(Merger.java:350) at org.apache.hadoop.mapred.Merger.writeFile(Merger.java:156) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.mergeParts(MapTask.java:1535) Local Variable: org.apache.hadoop.mapred.IFile$Writer#1 Local Variable: org.apache.hadoop.mapred.SpillRecord#1 Local Variable: org.apache.hadoop.mapred.IndexRecord#1 Local Variable: org.apache.hadoop.fs.Path[]#2 Local Variable: org.apache.hadoop.fs.Path#30 Local Variable: org.apache.hadoop.fs.FSDataOutputStream#1 Local Variable: org.apache.hadoop.fs.Path#29 Local Variable: org.apache.hadoop.mapred.Merger$MergeQueue#1 Local Variable: java.util.ArrayList#15453 at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java:1154) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:359) Local Variable: org.apache.hadoop.mapred.MapTask$MapOutputBuffer#1 Local Variable: org.apache.hadoop.io.DataInputBuffer#3 Local Variable: org.apache.hadoop.mapred.FileSplit#1 Local Variable: org.apache.hadoop.io.BytesWritable#1 Local Variable: org.apache.hadoop.mapred.MapTask$TrackedRecordReader#1 Local Variable: org.apache.hadoop.mapred.SequenceFileRecordReader#1 Local Variable: org.apache.nutch.fetcher.Fetcher#2 at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307) Local Variable: org.apache.hadoop.mapred.Task$TaskReporter#1 at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) Local Variable: org.apache.hadoop.mapred.TaskAttemptID#1 Local Variable: org.apache.hadoop.mapred.FileOutputCommitter#1 Local Variable: org.apache.hadoop.mapred.JobClient$RawSplit[]#1 Local Variable: org.apache.hadoop.mapred.JobContext#1 Local Variable: org.apache.hadoop.mapred.MapTask#1 Local Variable: org.apache.hadoop.mapred.JobConf#11
-- View this message in context: http://lucene.472066.n3.nabble.com/Tika-Excel-parsing-causing-out-of-memory-tp1188201p1204232.html Sent from the Nutch - User mailing list archive at Nabble.com.