[ https://issues.apache.org/jira/browse/SPARK-15260?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Davies Liu resolved SPARK-15260. -------------------------------- Resolution: Fixed Fix Version/s: 2.0.0 > UnifiedMemoryManager could be in bad state if any exception happen while > evicting blocks > ---------------------------------------------------------------------------------------- > > Key: SPARK-15260 > URL: https://issues.apache.org/jira/browse/SPARK-15260 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 1.6.0, 1.6.1, 2.0.0 > Reporter: Davies Liu > Assignee: Andrew Or > Fix For: 2.0.0 > > > {code} > Error: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 62 in stage 19.0 failed 4 times, most > recent failure: Lost task 62.3 in stage 19.0 (TID 2841, > ip-10-109-240-229.ec2.internal): java.io.IOException: > java.lang.AssertionError: assertion failed at > org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1223) at > org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock( > TorrentBroadcast.scala:165) at > org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute( > TorrentBroadcast.scala:64) at > org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast. > scala:64) at > org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast. > scala:88) at > org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70) at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala: 71) at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala: 46) at > org.apache.spark.scheduler.Task.run(Task.scala:96) at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:222) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor. > java:1142) at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor. > java:617) at > java.lang.Thread.run(Thread.java:745) Caused by: java.lang.AssertionError: > assertion failed at > scala.Predef$.assert(Predef.scala:165) at > org.apache.spark.memory.UnifiedMemoryManager.acquireStorageMemory( > UnifiedMemoryManager.scala:140) at > org.apache.spark.storage.MemoryStore.tryToPut(MemoryStore.scala:387) at > org.apache.spark.storage.MemoryStore.tryToPut(MemoryStore.scala:346) at > org.apache.spark.storage.MemoryStore.putBytes(MemoryStore.scala:99) at > org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:803) at > org.apache.spark.storage.BlockManager.putBytes(BlockManager.scala:690) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$or$$$$ > 1c5ab38dcb7d9b112f54b116debbe7f$$$$cast$$anonfun$$getRemote$1$1.apply( > TorrentBroadcast.scala:130) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$or$$$$ > 1c5ab38dcb7d9b112f54b116debbe7f$$$$cast$$anonfun$$getRemote$1$1.apply( > TorrentBroadcast.scala:127) at > scala.Option.map(Option.scala:145) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.org$apache$spark$broadcast$ > TorrentBroadcast$$anonfun$$getRemote$1(TorrentBroadcast.scala:127) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1$$anonfun$1.apply( > TorrentBroadcast.scala:137) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1$$anonfun$1.apply( > TorrentBroadcast.scala:137) at > scala.Option.orElse(Option.scala:257) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.apply$mcVI$sp(TorrentBroadcast. > scala:137) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala: 120) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala: 120) at > scala.collection.immutable.List.foreach(List.scala:318) at > org.apache.spark.broadcast.TorrentBroadcast.org$apache$spark$broadcast$ > TorrentBroadcast$$readBlocks(TorrentBroadcast.scala:120) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$ > 1.apply(TorrentBroadcast.scala:175) at > org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1220) ... 12 more > Driver stacktrace: at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$ > DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1419) at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1418) at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray. > scala:59) at > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala: 1418) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1. > apply(DAGScheduler.scala:799) at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1. > apply(DAGScheduler.scala:799) at > scala.Option.foreach(Option.scala:236) at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler > .scala:799) at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive( > DAGScheduler.scala:1640) at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive( > DAGScheduler.scala:1599) at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive( > DAGScheduler.scala:1588) at > org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at > org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at > org.apache.spark.SparkContext.runJob(SparkContext.scala:1850) at > org.apache.spark.SparkContext.runJob(SparkContext.scala:1863) at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:212 ) at > org.apache.spark.sql.execution.Limit.executeCollect(basicOperators.scala > :165) at > org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan. > scala:174) at > org.apache.spark.sql.DataFrame$$anonfun$org$apache$spark$sql$DataFrame$$ > execute$1$1.apply(DataFrame.scala:1499) at > org.apache.spark.sql.DataFrame$$anonfun$org$apache$spark$sql$DataFrame$$ > execute$1$1.apply(DataFrame.scala:1499) at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId( > SQLExecution.scala:56) at > org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:2086) at > org.apache.spark.sql.DataFrame.org$apache$spark$sql$DataFrame$$execute$1 > (DataFrame.scala:1498) at > org.apache.spark.sql.DataFrame.org$apache$spark$sql$DataFrame$$collect( > DataFrame.scala:1505) at > org.apache.spark.sql.DataFrame$$anonfun$head$1.apply(DataFrame.scala: 1375) at > org.apache.spark.sql.DataFrame$$anonfun$head$1.apply(DataFrame.scala: 1374) at > org.apache.spark.sql.DataFrame.withCallback(DataFrame.scala:2099) at > org.apache.spark.sql.DataFrame.head(DataFrame.scala:1374) at > org.apache.spark.sql.DataFrame.take(DataFrame.scala:1456) > at org.apache.spark.sql.DataFrame.showString(DataFrame.scala:170) at > org.apache.spark.sql.DataFrame.show(DataFrame.scala:350) at > org.apache.spark.sql.DataFrame.show(DataFrame.scala:311) at > org.apache.spark.sql.DataFrame.show(DataFrame.scala:319) Caused by: > java.io.IOException: java.lang.AssertionError: > assertion failed at > org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1223) at > org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock( > TorrentBroadcast.scala:165) at > org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute( > TorrentBroadcast.scala:64) at > org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast. > scala:64) at > org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast. > scala:88) at > org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70) at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala: 71) at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala: 46) at > org.apache.spark.scheduler.Task.run(Task.scala:96) at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:222) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor. > java:1142) at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor. > java:617) at > java.lang.Thread.run(Thread.java:745) Caused by: java.lang.AssertionError: > assertion failed at > scala.Predef$.assert(Predef.scala:165) at > org.apache.spark.memory.UnifiedMemoryManager.acquireStorageMemory( > UnifiedMemoryManager.scala:140) at > org.apache.spark.storage.MemoryStore.tryToPut(MemoryStore.scala:387) at > org.apache.spark.storage.MemoryStore.tryToPut(MemoryStore.scala:346) at > org.apache.spark.storage.MemoryStore.putBytes(MemoryStore.scala:99) at > org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:803) at > org.apache.spark.storage.BlockManager.putBytes(BlockManager.scala:690) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$or$$$$ > 1c5ab38dcb7d9b112f54b116debbe7f$$$$cast$$anonfun$$getRemote$1$1.apply( > TorrentBroadcast.scala:130) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$or$$$$ > 1c5ab38dcb7d9b112f54b116debbe7f$$$$cast$$anonfun$$getRemote$1$1.apply( > TorrentBroadcast.scala:127) at > scala.Option.map(Option.scala:145) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.org$apache$spark$broadcast$ > TorrentBroadcast$$anonfun$$getRemote$1(TorrentBroadcast.scala:127) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1$$anonfun$1.apply( > TorrentBroadcast.scala:137) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1$$anonfun$1.apply( > TorrentBroadcast.scala:137) at > scala.Option.orElse(Option.scala:257) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.apply$mcVI$sp(TorrentBroadcast. > scala:137) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala: 120) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$ > broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala: 120) at > scala.collection.immutable.List.foreach(List.scala:318) at > org.apache.spark.broadcast.TorrentBroadcast.org$apache$spark$broadcast$ > TorrentBroadcast$$readBlocks(TorrentBroadcast.scala:120) at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$ > 1.apply(TorrentBroadcast.scala:175) at > org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1220) at > org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock( > TorrentBroadcast.scala:165) at > org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute( > TorrentBroadcast.scala:64) at > org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast. > scala:64) at > org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast. > scala:88) at > org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70) at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala: 71) at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala: 46) at > org.apache.spark.scheduler.Task.run(Task.scala:96) at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:222) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor. > java:1142) at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor. > java:617) at > java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org