[
https://issues.apache.org/jira/browse/FLINK-19426?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17203944#comment-17203944
]
Arvid Heise edited comment on FLINK-19426 at 9/29/20, 2:05 PM:
---------------------------------------------------------------
I think that the exception in the description is expected: the test kills a TM
and then no connection cannot be established.
What seems to be the actual issue is that the newly added TM is not properly
registered.
{noformat}
2020-09-26 22:08:52,870 ERROR
org.apache.flink.runtime.taskexecutor.TaskExecutor [] - Fatal error
occurred in TaskExecutor
akka.ssl.tcp://[email protected]:44909/user/rpc/taskmanager_0.
org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException:
Could not register at the ResourceManager within the specified maximum
registration duration 300000 ms. This indicates a problem with this instance.
Terminating now.
at
org.apache.flink.runtime.taskexecutor.TaskExecutor.registrationTimeout(TaskExecutor.java:1256)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.taskexecutor.TaskExecutor.lambda$startRegistrationTimeout$18(TaskExecutor.java:1242)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.ActorCell.invoke(ActorCell.scala:561)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.Mailbox.run(Mailbox.scala:225)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]{noformat}
leading to the application eventually failing
{noformat}
2020-09-26 22:03:08,521 ERROR
org.apache.flink.runtime.util.FatalExitExceptionHandler [] - FATAL: Thread
'flink-akka.actor.default-dispatcher-17' produced an uncaught exception.
Stopping the process...
java.util.concurrent.CompletionException:
java.util.concurrent.CompletionException: java.lang.IllegalStateException:
Could not assign resource
org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@7d7fbd5 to
current execution Attempt #3 (Source: Custom Source (1/1)) @ (unassigned) -
[SCHEDULED].
at
org.apache.flink.runtime.scheduler.DefaultScheduler.propagateIfNonNull(DefaultScheduler.java:423)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$deployAll$5(DefaultScheduler.java:408)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:848)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2168)
~[?:1.8.0_265]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.waitForAllSlotsAndDeploy(DefaultScheduler.java:391)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.allocateSlotsAndDeploy(DefaultScheduler.java:344)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.strategy.PipelinedRegionSchedulingStrategy.maybeScheduleRegion(PipelinedRegionSchedulingStrategy.java:143)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.strategy.PipelinedRegionSchedulingStrategy.maybeScheduleRegions(PipelinedRegionSchedulingStrategy.java:128)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.strategy.PipelinedRegionSchedulingStrategy.restartTasks(PipelinedRegionSchedulingStrategy.java:100)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$restartTasks$2(DefaultScheduler.java:290)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture$UniRun.tryFire(CompletableFuture.java:701)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
~[?:1.8.0_265]
at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.actor.ActorCell.invoke(ActorCell.scala:561)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.Mailbox.run(Mailbox.scala:225)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
Caused by: java.util.concurrent.CompletionException:
java.lang.IllegalStateException: Could not assign resource
org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@7d7fbd5 to
current execution Attempt #3 (Source: Custom Source (1/1)) @ (unassigned) -
[SCHEDULED].
at
java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:838)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:848)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2168)
~[?:1.8.0_265]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.assignAllResources(DefaultScheduler.java:400)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
... 31 more
Caused by: java.lang.IllegalStateException: Could not assign resource
org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@7d7fbd5 to
current execution Attempt #3 (Source: Custom Source (1/1)) @ (unassigned) -
[SCHEDULED].
at
org.apache.flink.runtime.executiongraph.ExecutionVertex.tryAssignResource(ExecutionVertex.java:701)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$assignResourceOrHandleError$6(DefaultScheduler.java:445)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:848)
~[?:1.8.0_265]
at
java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2168)
~[?:1.8.0_265]
at
org.apache.flink.runtime.scheduler.DefaultScheduler.assignAllResources(DefaultScheduler.java:400)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
... 31 more{noformat}
was (Author: aheise):
Root cause of the exception
{noformat}
2020-09-26 22:02:04,149 WARN org.apache.flink.runtime.taskmanager.Task
[] - Sink: Unnamed (4/4) (0a448493b4782967b150582570326227_3_0)
switched from RUNNING to FAILED.
org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException:
Connection unexpectedly closed by remote task manager
'10.1.0.4/10.1.0.4:38905'. This might indicate that the remote task manager was
lost.
at
org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.channelInactive(CreditBasedPartitionRequestClientHandler.java:144)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.runtime.io.network.netty.NettyMessageClientDecoderDelegate.channelInactive(NettyMessageClientDecoderDelegate.java:97)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.channelInputClosed(ByteToMessageDecoder.java:393)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.channelInactive(ByteToMessageDecoder.java:358)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.handler.ssl.SslHandler.channelInactive(SslHandler.java:1075)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1416)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:912)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:816)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:416)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:331)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:918)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_265]
{noformat}
> End-to-end test sometimes fails with PartitionConnectionException
> -----------------------------------------------------------------
>
> Key: FLINK-19426
> URL: https://issues.apache.org/jira/browse/FLINK-19426
> Project: Flink
> Issue Type: Bug
> Components: Runtime / Network, Tests
> Affects Versions: 1.12.0
> Reporter: Dian Fu
> Priority: Major
> Labels: test-stability
>
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=6983&view=logs&j=68a897ab-3047-5660-245a-cce8f83859f6&t=16ca2cca-2f63-5cce-12d2-d519b930a729
> {code}
> 2020-09-26T22:16:26.9856525Z
> org.apache.flink.runtime.io.network.partition.consumer.PartitionConnectionException:
> Connection for partition
> 619775973ed0f282e20f9d55d13913ab#0@bc764cd8ddf7a0cff126f51c16239658_0_1 not
> reachable.
> 2020-09-26T22:16:26.9857848Z at
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:159)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9859168Z at
> org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.internalRequestPartitions(SingleInputGate.java:336)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9860449Z at
> org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.requestPartitions(SingleInputGate.java:308)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9861677Z at
> org.apache.flink.runtime.taskmanager.InputGateWithMetrics.requestPartitions(InputGateWithMetrics.java:95)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9862861Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.requestPartitions(StreamTask.java:542)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9864018Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.readRecoveredChannelState(StreamTask.java:507)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9865284Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$beforeInvoke$0(StreamTask.java:498)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9866415Z at
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:47)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9867500Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.beforeInvoke(StreamTask.java:492)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9868514Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:550)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9869450Z at
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:722)
> [flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9870339Z at
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:547)
> [flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9870869Z at java.lang.Thread.run(Thread.java:748)
> [?:1.8.0_265]
> 2020-09-26T22:16:26.9872060Z Caused by: java.io.IOException:
> java.util.concurrent.ExecutionException:
> org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException:
> Connecting to remote task manager '/10.1.0.4:38905' has failed. This might
> indicate that the remote task manager has been lost.
> 2020-09-26T22:16:26.9873511Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:85)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9874788Z at
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9876084Z at
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9876567Z ... 12 more
> 2020-09-26T22:16:26.9877477Z Caused by:
> java.util.concurrent.ExecutionException:
> org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException:
> Connecting to remote task manager '/10.1.0.4:38905' has failed. This might
> indicate that the remote task manager has been lost.
> 2020-09-26T22:16:26.9878503Z at
> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> ~[?:1.8.0_265]
> 2020-09-26T22:16:26.9879061Z at
> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
> ~[?:1.8.0_265]
> 2020-09-26T22:16:26.9880244Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:83)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9884461Z at
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9885737Z at
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9886304Z ... 12 more
> 2020-09-26T22:16:26.9887211Z Caused by:
> org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException:
> Connecting to remote task manager '/10.1.0.4:38905' has failed. This might
> indicate that the remote task manager has been lost.
> 2020-09-26T22:16:26.9888456Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connect(PartitionRequestClientFactory.java:122)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9889704Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connectWithRetries(PartitionRequestClientFactory.java:101)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9891028Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.lambda$createPartitionRequestClient$1(PartitionRequestClientFactory.java:78)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9892193Z at
> org.apache.flink.runtime.concurrent.FutureUtils.completeFromCallable(FutureUtils.java:87)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9893396Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:78)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9894646Z at
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9895718Z at
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9896201Z ... 12 more
> 2020-09-26T22:16:26.9896424Z Caused by: java.lang.NullPointerException
> 2020-09-26T22:16:26.9897066Z at
> org.apache.flink.util.Preconditions.checkNotNull(Preconditions.java:58)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9898008Z at
> org.apache.flink.runtime.io.network.netty.NettyPartitionRequestClient.<init>(NettyPartitionRequestClient.java:73)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9899040Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connect(PartitionRequestClientFactory.java:116)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9900118Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connectWithRetries(PartitionRequestClientFactory.java:101)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9901443Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.lambda$createPartitionRequestClient$1(PartitionRequestClientFactory.java:78)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9902613Z at
> org.apache.flink.runtime.concurrent.FutureUtils.completeFromCallable(FutureUtils.java:87)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9904043Z at
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:78)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9905404Z at
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9906893Z at
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9907510Z ... 12 more
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)