[
https://issues.apache.org/jira/browse/SPARK-30853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yuming Wang updated SPARK-30853:
--------------------------------
Description:
{noformat}
org.apache.spark.SparkException: Error communicating with MapOutputTracker
at
org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:277)
at
org.apache.spark.MapOutputTrackerWorker.getStatuses(MapOutputTracker.scala:894)
at
org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:825)
at
org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:56)
at
org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:174)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:151)
at org.apache.spark.scheduler.Task.run(Task.scala:120)
at
org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:408)
at
org.apache.spark.executor.Executor$TaskRunner$$anon$3.run(Executor.scala:341)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at
org.apache.spark.executor.Executor$TaskRunner.withinTaskUGI(Executor.scala:340)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.rpc.RpcTimeoutException: Futures timed out after
[300 seconds]. This timeout is controlled by spark.network.timeout
at
org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcTimeout.scala:47)
at
org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:62)
at
org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:58)
at
scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36)
at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:76)
at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:92)
at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:76)
at
org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:273)
... 27 more
Caused by: java.util.concurrent.TimeoutException: Futures timed out after [300
seconds]
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
at
scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:217)
at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
... 30 more
{noformat}
was:
{noformat}
20/01/15 19:34:55,033 WARN [task-result-getter-0] scheduler.TaskSetManager:66 :
Lost task 5356.0 in stage 535.1 (TID 2723448,
hdc49-mcc10-01-0710-3609-012-tess0035.stratus.rno.ebay.com, executor 7459):
FetchFailed(BlockManagerId(5622,
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com, 7337, None),
shuffleId=158, mapId=689, reduceId=5964, message=
org.apache.spark.shuffle.FetchFailedException: Failed to connect to
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com/10.18.158.175:7337
at
org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:555)
at
org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:486)
at
org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:64)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at
org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
at
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.agg_doAggregateWithKeys_0$(Unknown
Source)
at
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown
Source)
at
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:49)
at
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$12$$anon$1.hasNext(WholeStageCodegenExec.scala:634)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:226)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:164)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:163)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:151)
at org.apache.spark.scheduler.Task.run(Task.scala:120)
at
org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:408)
at
org.apache.spark.executor.Executor$TaskRunner$$anon$3.run(Executor.scala:341)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at
org.apache.spark.executor.Executor$TaskRunner.withinTaskUGI(Executor.scala:340)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: Failed to connect to
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com/10.18.158.175:7337
at
org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:250)
at
org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:192)
at
org.apache.spark.network.shuffle.ExternalShuffleClient.lambda$fetchBlocks$0(ExternalShuffleClient.java:100)
at
org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
at
org.apache.spark.network.shuffle.RetryingBlockFetcher.lambda$initiateRetry$0(RetryingBlockFetcher.java:169)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at
io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
... 1 more
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException:
Connection timed out:
hdc49-mcc10-01-0710-3903-022-tess0035.stratus.rno.ebay.com/10.18.158.175:7337
at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
at
sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
at
io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:323)
at
io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340)
at
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:633)
at
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
at
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
at
io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
... 2 more
Caused by: java.net.ConnectException: Connection timed out
... 11 more
)
{noformat}
> Error communicating with MapOutputTracker issue
> -----------------------------------------------
>
> Key: SPARK-30853
> URL: https://issues.apache.org/jira/browse/SPARK-30853
> Project: Spark
> Issue Type: Improvement
> Components: Spark Core
> Affects Versions: 2.3.4
> Reporter: Yuming Wang
> Priority: Major
>
> {noformat}
> org.apache.spark.SparkException: Error communicating with MapOutputTracker
> at
> org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:277)
> at
> org.apache.spark.MapOutputTrackerWorker.getStatuses(MapOutputTracker.scala:894)
> at
> org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:825)
> at
> org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:56)
> at
> org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:174)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:59)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:325)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:289)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:151)
> at org.apache.spark.scheduler.Task.run(Task.scala:120)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:408)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anon$3.run(Executor.scala:341)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
> at
> org.apache.spark.executor.Executor$TaskRunner.withinTaskUGI(Executor.scala:340)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.spark.rpc.RpcTimeoutException: Futures timed out after
> [300 seconds]. This timeout is controlled by spark.network.timeout
> at
> org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcTimeout.scala:47)
> at
> org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:62)
> at
> org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:58)
> at
> scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36)
> at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:76)
> at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:92)
> at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:76)
> at
> org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:273)
> ... 27 more
> Caused by: java.util.concurrent.TimeoutException: Futures timed out after
> [300 seconds]
> at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
> at
> scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
> at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:217)
> at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
> ... 30 more
> {noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]