Hi, Seems the known issue, see https://issues.apache.org/jira/browse/SPARK-4105
// maropu On Sat, Sep 10, 2016 at 11:08 PM, 齐忠 <cente...@gmail.com> wrote: > Hi all > > when use default compression snappy,I get error when spark doing shuffle > > 16/09/09 08:33:15 ERROR executor.Executor: Managed memory leak detected; > size = 89817648 bytes, TID = 20912 > 16/09/09 08:33:15 ERROR executor.Executor: Exception in task 63.2 in stage > 1.0 (TID 20912) > java.io.IOException: FAILED_TO_UNCOMPRESS(5) > at org.xerial.snappy.SnappyNative.throw_error( > SnappyNative.java:98) > at org.xerial.snappy.SnappyNative.rawUncompress(Native Method) > at org.xerial.snappy.Snappy.rawUncompress(Snappy.java:465) > at org.xerial.snappy.Snappy.uncompress(Snappy.java:504) > at org.xerial.snappy.SnappyInputStream.readFully( > SnappyInputStream.java:147) > at org.xerial.snappy.SnappyInputStream.readHeader( > SnappyInputStream.java:99) > at org.xerial.snappy.SnappyInputStream.<init>( > SnappyInputStream.java:59) > at org.apache.spark.io.SnappyCompressionCodec. > compressedInputStream(CompressionCodec.scala:159) > at org.apache.spark.storage.BlockManager.wrapForCompression( > BlockManager.scala:1186) > at org.apache.spark.shuffle.BlockStoreShuffleReader$$ > anonfun$2.apply(BlockStoreShuffleReader.scala:53) > at org.apache.spark.shuffle.BlockStoreShuffleReader$$ > anonfun$2.apply(BlockStoreShuffleReader.scala:52) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > at org.apache.spark.util.CompletionIterator.hasNext( > CompletionIterator.scala:32) > at org.apache.spark.InterruptibleIterator.hasNext( > InterruptibleIterator.scala:39) > at org.apache.spark.util.collection.ExternalAppendOnlyMap. > insertAll(ExternalAppendOnlyMap.scala:152) > at org.apache.spark.Aggregator.combineCombinersByKey( > Aggregator.scala:58) > at org.apache.spark.shuffle.BlockStoreShuffleReader.read( > BlockStoreShuffleReader.scala:83) > at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:98) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:73) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run( > Executor.scala:214) > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1145) > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > > env info > > spark on yarn(cluster)scalaVersion := "2.10.6" > libraryDependencies += "org.apache.spark" %% "spark-core" % "1.6.0" % > "provided"libraryDependencies += "org.apache.spark" %% "spark-mllib" % > "1.6.0" % "provided" > > > THANKS > > > -- > cente...@gmail.com > -- --- Takeshi Yamamuro