Hey all I'm using Spark-1.6.1 and occasionally seeing executors lost and hurting my application performance due to these errors. Can someone please let out all the possible problems that could cause this?
Full log: 16/05/19 02:17:54 ERROR ContextCleaner: Error cleaning broadcast 266685 org.apache.spark.rpc.RpcTimeoutException: Futures timed out after [120 seconds]. This timeout is controlled by spark.rpc.askTimeout at org.apache.spark.rpc.RpcTimeout.org $apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcEnv.scala:214) at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcEnv.scala:229) at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcEnv.scala:225) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcEnv.scala:242) at org.apache.spark.storage.BlockManagerMaster.removeBroadcast(BlockManagerMaster.scala:136) at org.apache.spark.broadcast.TorrentBroadcast$.unpersist(TorrentBroadcast.scala:228) at org.apache.spark.broadcast.TorrentBroadcastFactory.unbroadcast(TorrentBroadcastFactory.scala:45) at org.apache.spark.broadcast.BroadcastManager.unbroadcast(BroadcastManager.scala:67) at org.apache.spark.ContextCleaner.doCleanupBroadcast(ContextCleaner.scala:214) at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$2.apply(ContextCleaner.scala:170) at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$2.apply(ContextCleaner.scala:161) at scala.Option.foreach(Option.scala:257) at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:161) at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1136) at org.apache.spark.ContextCleaner.org $apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:154) at org.apache.spark.ContextCleaner$$anon$3.run(ContextCleaner.scala:67) Caused by: java.util.concurrent.TimeoutException: Futures timed out after [120 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:190) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:190) at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcEnv.scala:241) ... 12 more 16/05/19 02:18:26 ERROR TaskSchedulerImpl: Lost executor 20160421-192532-1677787146-5050-40596-S23 on ip-10-0-1-70.ec2.internal: Executor heartbeat timed out after 161447 ms 16/05/19 02:18:53 ERROR TaskSchedulerImpl: Lost executor 20160421-192532-1677787146-5050-40596-S23 on ip-10-0-1-70.ec2.internal: remote Rpc client disassociated Thanks Sahil