[ 
https://issues.apache.org/jira/browse/SPARK-30853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Yuming Wang updated SPARK-30853:
--------------------------------
    Description: 
{noformat}
org.apache.spark.SparkException: Error communicating with MapOutputTracker
        at 
org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:277)
        at 
org.apache.spark.MapOutputTrackerWorker.getStatuses(MapOutputTracker.scala:894)
        at 
org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:825)
        at 
org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:56)
        at 
org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:174)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:151)
        at org.apache.spark.scheduler.Task.run(Task.scala:120)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:408)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anon$3.run(Executor.scala:341)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
        at 
org.apache.spark.executor.Executor$TaskRunner.withinTaskUGI(Executor.scala:340)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.rpc.RpcTimeoutException: Futures timed out after 
[300 seconds]. This timeout is controlled by spark.network.timeout
        at 
org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcTimeout.scala:47)
        at 
org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:62)
        at 
org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:58)
        at 
scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36)
        at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:76)
        at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:92)
        at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:76)
        at 
org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:273)
        ... 27 more
Caused by: java.util.concurrent.TimeoutException: Futures timed out after [300 
seconds]
        at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
        at 
scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
        at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:217)
        at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
        ... 30 more
{noformat}


  was:

{noformat}
20/01/15 19:34:55,033 WARN [task-result-getter-0] scheduler.TaskSetManager:66 : 
Lost task 5356.0 in stage 535.1 (TID 2723448, 
hdc49-mcc10-01-0710-3609-012-tess0035.stratus.rno.ebay.com, executor 7459): 
FetchFailed(BlockManagerId(5622, 
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com, 7337, None), 
shuffleId=158, mapId=689, reduceId=5964, message=
org.apache.spark.shuffle.FetchFailedException: Failed to connect to 
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com/10.18.158.175:7337
        at 
org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:555)
        at 
org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:486)
        at 
org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:64)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
        at 
org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
        at 
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.agg_doAggregateWithKeys_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:49)
        at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$12$$anon$1.hasNext(WholeStageCodegenExec.scala:634)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:226)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:164)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:163)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:151)
        at org.apache.spark.scheduler.Task.run(Task.scala:120)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:408)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anon$3.run(Executor.scala:341)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
        at 
org.apache.spark.executor.Executor$TaskRunner.withinTaskUGI(Executor.scala:340)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: Failed to connect to 
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com/10.18.158.175:7337
        at 
org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:250)
        at 
org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:192)
        at 
org.apache.spark.network.shuffle.ExternalShuffleClient.lambda$fetchBlocks$0(ExternalShuffleClient.java:100)
        at 
org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
        at 
org.apache.spark.network.shuffle.RetryingBlockFetcher.lambda$initiateRetry$0(RetryingBlockFetcher.java:169)
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at 
io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
        ... 1 more
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: 
Connection timed out: 
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com/10.18.158.175:7337
        at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
        at 
sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
        at 
io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:323)
        at 
io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340)
        at 
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:633)
        at 
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
        at 
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
        at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
        at 
io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
        ... 2 more
Caused by: java.net.ConnectException: Connection timed out
        ... 11 more

)
{noformat}



> Error communicating with MapOutputTracker issue
> -----------------------------------------------
>
>                 Key: SPARK-30853
>                 URL: https://issues.apache.org/jira/browse/SPARK-30853
>             Project: Spark
>          Issue Type: Improvement
>          Components: Spark Core
>    Affects Versions: 2.3.4
>            Reporter: Yuming Wang
>            Priority: Major
>
> {noformat}
> org.apache.spark.SparkException: Error communicating with MapOutputTracker
>       at 
> org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:277)
>       at 
> org.apache.spark.MapOutputTrackerWorker.getStatuses(MapOutputTracker.scala:894)
>       at 
> org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:825)
>       at 
> org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:56)
>       at 
> org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:174)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:151)
>       at org.apache.spark.scheduler.Task.run(Task.scala:120)
>       at 
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:408)
>       at 
> org.apache.spark.executor.Executor$TaskRunner$$anon$3.run(Executor.scala:341)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.withinTaskUGI(Executor.scala:340)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.spark.rpc.RpcTimeoutException: Futures timed out after 
> [300 seconds]. This timeout is controlled by spark.network.timeout
>       at 
> org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcTimeout.scala:47)
>       at 
> org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:62)
>       at 
> org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:58)
>       at 
> scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36)
>       at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:76)
>       at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:92)
>       at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:76)
>       at 
> org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:273)
>       ... 27 more
> Caused by: java.util.concurrent.TimeoutException: Futures timed out after 
> [300 seconds]
>       at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
>       at 
> scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
>       at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:217)
>       at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
>       ... 30 more
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to