[ https://issues.apache.org/jira/browse/SPARK-25316?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17236473#comment-17236473 ]
amit sharma commented on SPARK-25316: ------------------------------------- I am using spark 2.3.3 with 16 workers with 30 cores each. I facing similar exception Error cleaning broadcast 130038ERROR ContextCleaner:91 - Error cleaning broadcast 130038 org.apache.spark.rpc.RpcTimeoutException: Futures timed out after [240 seconds]. This timeout is controlled by spark.network.timeout at org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcTimeout.scala:47) at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:62) at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:58) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:76) at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:92) at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:76) at org.apache.spark.storage.BlockManagerMaster.removeBroadcast(BlockManagerMaster.scala:148) at org.apache.spark.broadcast.TorrentBroadcast$.unpersist(TorrentBroadcast.scala:321) at org.apache.spark.broadcast.TorrentBroadcastFactory.unbroadcast(TorrentBroadcastFactory.scala:45) at org.apache.spark.broadcast.BroadcastManager.unbroadcast(BroadcastManager.scala:66) at org.apache.spark.ContextCleaner.doCleanupBroadcast(ContextCleaner.scala:238) at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$1.apply(ContextCleaner.scala:194) at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$an > Spark error - ERROR ContextCleaner: Error cleaning broadcast 22, Exception > thrown in awaitResult: > --------------------------------------------------------------------------------------------------- > > Key: SPARK-25316 > URL: https://issues.apache.org/jira/browse/SPARK-25316 > Project: Spark > Issue Type: Bug > Components: Java API > Affects Versions: 2.2.2 > Reporter: Vidya > Priority: Major > Labels: bulk-closed > > While running spark load on EMR with c3 instaces, we see following error > ERROR ContextCleaner: Error cleaning broadcast 22 > org.apache.spark.SparkException: Exception thrown in awaitResult: > > Whats the cause of the error and how do we fix it? > > Stage 30:=================================> (374 + 20) / 600] > [Stage 30:=====================================> (419 + 20) / 600] > [Stage 30:==========================================> (471 + 4) / > 600]18/08/02 21:06:09 ERROR TransportResponseHandler: Still have 1 requests > outstanding when connection from /10.154.21.145:45990 is closed > 18/08/02 21:06:09 ERROR ContextCleaner: Error cleaning broadcast 22 > org.apache.spark.SparkException: Exception thrown in awaitResult: > at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:205) > at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75) > at > org.apache.spark.storage.BlockManagerMaster.removeBroadcast(BlockManagerMaster.scala:161) > at > org.apache.spark.broadcast.TorrentBroadcast$.unpersist(TorrentBroadcast.scala:306) > at > org.apache.spark.broadcast.TorrentBroadcastFactory.unbroadcast(TorrentBroadcastFactory.scala:45) > at > org.apache.spark.broadcast.BroadcastManager.unbroadcast(BroadcastManager.scala:60) > at > org.apache.spark.ContextCleaner.doCleanupBroadcast(ContextCleaner.scala:238) > at > org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$1.apply(ContextCleaner.scala:194) > at > org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$1.apply(ContextCleaner.scala:185) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:185) > at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279) > at > org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178) > at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73) > Caused by: java.io.IOException: Connection reset by peer > at sun.nio.ch.FileDispatcherImpl.read0(Native Method) > at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) > at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) > at sun.nio.ch.IOUtil.read(IOUtil.java:192) > at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) > at > io.netty.buffer.PooledUnsafeDirectByteBuf.setBytes(PooledUnsafeDirectByteBuf.java:221) > at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:899) > at > io.netty.channel.socket.nio.NioSocketChannel.doReadBytes(NioSocketChannel.java:275) > at > io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:119) > at > io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643) > at > io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566) > at > io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480) > at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442) > at > io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131) > at > io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144) > at java.lang.Thread.run(Thread.java:748) -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org