[ 
https://issues.apache.org/jira/browse/FLINK-19426?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17203944#comment-17203944
 ] 

Arvid Heise edited comment on FLINK-19426 at 9/29/20, 2:05 PM:
---------------------------------------------------------------

I think that the exception in the description is expected: the test kills a TM 
and then no connection cannot be established.

What seems to be the actual issue is that the newly added TM is not properly 
registered.
{noformat}
2020-09-26 22:08:52,870 ERROR 
org.apache.flink.runtime.taskexecutor.TaskExecutor           [] - Fatal error 
occurred in TaskExecutor 
akka.ssl.tcp://[email protected]:44909/user/rpc/taskmanager_0.
org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException: 
Could not register at the ResourceManager within the specified maximum 
registration duration 300000 ms. This indicates a problem with this instance. 
Terminating now.
        at 
org.apache.flink.runtime.taskexecutor.TaskExecutor.registrationTimeout(TaskExecutor.java:1256)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.taskexecutor.TaskExecutor.lambda$startRegistrationTimeout$18(TaskExecutor.java:1242)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.Actor$class.aroundReceive(Actor.scala:517) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.ActorCell.invoke(ActorCell.scala:561) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.Mailbox.run(Mailbox.scala:225) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.Mailbox.exec(Mailbox.scala:235) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]{noformat}
leading to the application eventually failing
{noformat}
2020-09-26 22:03:08,521 ERROR 
org.apache.flink.runtime.util.FatalExitExceptionHandler      [] - FATAL: Thread 
'flink-akka.actor.default-dispatcher-17' produced an uncaught exception. 
Stopping the process...
java.util.concurrent.CompletionException: 
java.util.concurrent.CompletionException: java.lang.IllegalStateException: 
Could not assign resource 
org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@7d7fbd5 to 
current execution Attempt #3 (Source: Custom Source (1/1)) @ (unassigned) - 
[SCHEDULED].
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.propagateIfNonNull(DefaultScheduler.java:423)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$deployAll$5(DefaultScheduler.java:408)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836) 
~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:848)
 ~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2168) 
~[?:1.8.0_265]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.waitForAllSlotsAndDeploy(DefaultScheduler.java:391)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.allocateSlotsAndDeploy(DefaultScheduler.java:344)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.strategy.PipelinedRegionSchedulingStrategy.maybeScheduleRegion(PipelinedRegionSchedulingStrategy.java:143)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.strategy.PipelinedRegionSchedulingStrategy.maybeScheduleRegions(PipelinedRegionSchedulingStrategy.java:128)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.strategy.PipelinedRegionSchedulingStrategy.restartTasks(PipelinedRegionSchedulingStrategy.java:100)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$restartTasks$2(DefaultScheduler.java:290)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
java.util.concurrent.CompletableFuture.uniRun(CompletableFuture.java:719) 
~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture$UniRun.tryFire(CompletableFuture.java:701)
 ~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
 ~[?:1.8.0_265]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.Actor$class.aroundReceive(Actor.scala:517) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.actor.ActorCell.invoke(ActorCell.scala:561) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.Mailbox.run(Mailbox.scala:225) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.Mailbox.exec(Mailbox.scala:235) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) 
[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
Caused by: java.util.concurrent.CompletionException: 
java.lang.IllegalStateException: Could not assign resource 
org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@7d7fbd5 to 
current execution Attempt #3 (Source: Custom Source (1/1)) @ (unassigned) - 
[SCHEDULED].
        at 
java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
 ~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
 ~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:838) 
~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:848)
 ~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2168) 
~[?:1.8.0_265]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.assignAllResources(DefaultScheduler.java:400)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        ... 31 more
Caused by: java.lang.IllegalStateException: Could not assign resource 
org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@7d7fbd5 to 
current execution Attempt #3 (Source: Custom Source (1/1)) @ (unassigned) - 
[SCHEDULED].
        at 
org.apache.flink.runtime.executiongraph.ExecutionVertex.tryAssignResource(ExecutionVertex.java:701)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$assignResourceOrHandleError$6(DefaultScheduler.java:445)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836) 
~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:848)
 ~[?:1.8.0_265]
        at 
java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2168) 
~[?:1.8.0_265]
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.assignAllResources(DefaultScheduler.java:400)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        ... 31 more{noformat}


was (Author: aheise):
Root cause of the exception


{noformat}
2020-09-26 22:02:04,149 WARN  org.apache.flink.runtime.taskmanager.Task         
           [] - Sink: Unnamed (4/4) (0a448493b4782967b150582570326227_3_0) 
switched from RUNNING to FAILED.
org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException: 
Connection unexpectedly closed by remote task manager 
'10.1.0.4/10.1.0.4:38905'. This might indicate that the remote task manager was 
lost.
        at 
org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.channelInactive(CreditBasedPartitionRequestClientHandler.java:144)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.runtime.io.network.netty.NettyMessageClientDecoderDelegate.channelInactive(NettyMessageClientDecoderDelegate.java:97)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.channelInputClosed(ByteToMessageDecoder.java:393)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.channelInactive(ByteToMessageDecoder.java:358)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.handler.ssl.SslHandler.channelInactive(SslHandler.java:1075)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1416)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:912)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:816)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:416)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:331)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:918)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at 
org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
 ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
        at java.lang.Thread.run(Thread.java:748) [?:1.8.0_265]
{noformat}


> End-to-end test sometimes fails with PartitionConnectionException
> -----------------------------------------------------------------
>
>                 Key: FLINK-19426
>                 URL: https://issues.apache.org/jira/browse/FLINK-19426
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Network, Tests
>    Affects Versions: 1.12.0
>            Reporter: Dian Fu
>            Priority: Major
>              Labels: test-stability
>
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=6983&view=logs&j=68a897ab-3047-5660-245a-cce8f83859f6&t=16ca2cca-2f63-5cce-12d2-d519b930a729
> {code}
> 2020-09-26T22:16:26.9856525Z 
> org.apache.flink.runtime.io.network.partition.consumer.PartitionConnectionException:
>  Connection for partition 
> 619775973ed0f282e20f9d55d13913ab#0@bc764cd8ddf7a0cff126f51c16239658_0_1 not 
> reachable.
> 2020-09-26T22:16:26.9857848Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:159)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9859168Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.internalRequestPartitions(SingleInputGate.java:336)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9860449Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.requestPartitions(SingleInputGate.java:308)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9861677Z  at 
> org.apache.flink.runtime.taskmanager.InputGateWithMetrics.requestPartitions(InputGateWithMetrics.java:95)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9862861Z  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.requestPartitions(StreamTask.java:542)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9864018Z  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.readRecoveredChannelState(StreamTask.java:507)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9865284Z  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$beforeInvoke$0(StreamTask.java:498)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9866415Z  at 
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:47)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9867500Z  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.beforeInvoke(StreamTask.java:492)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9868514Z  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:550)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9869450Z  at 
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:722) 
> [flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9870339Z  at 
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:547) 
> [flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9870869Z  at java.lang.Thread.run(Thread.java:748) 
> [?:1.8.0_265]
> 2020-09-26T22:16:26.9872060Z Caused by: java.io.IOException: 
> java.util.concurrent.ExecutionException: 
> org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException: 
> Connecting to remote task manager '/10.1.0.4:38905' has failed. This might 
> indicate that the remote task manager has been lost.
> 2020-09-26T22:16:26.9873511Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:85)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9874788Z  at 
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9876084Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9876567Z  ... 12 more
> 2020-09-26T22:16:26.9877477Z Caused by: 
> java.util.concurrent.ExecutionException: 
> org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException: 
> Connecting to remote task manager '/10.1.0.4:38905' has failed. This might 
> indicate that the remote task manager has been lost.
> 2020-09-26T22:16:26.9878503Z  at 
> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) 
> ~[?:1.8.0_265]
> 2020-09-26T22:16:26.9879061Z  at 
> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) 
> ~[?:1.8.0_265]
> 2020-09-26T22:16:26.9880244Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:83)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9884461Z  at 
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9885737Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9886304Z  ... 12 more
> 2020-09-26T22:16:26.9887211Z Caused by: 
> org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException: 
> Connecting to remote task manager '/10.1.0.4:38905' has failed. This might 
> indicate that the remote task manager has been lost.
> 2020-09-26T22:16:26.9888456Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connect(PartitionRequestClientFactory.java:122)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9889704Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connectWithRetries(PartitionRequestClientFactory.java:101)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9891028Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.lambda$createPartitionRequestClient$1(PartitionRequestClientFactory.java:78)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9892193Z  at 
> org.apache.flink.runtime.concurrent.FutureUtils.completeFromCallable(FutureUtils.java:87)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9893396Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:78)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9894646Z  at 
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9895718Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9896201Z  ... 12 more
> 2020-09-26T22:16:26.9896424Z Caused by: java.lang.NullPointerException
> 2020-09-26T22:16:26.9897066Z  at 
> org.apache.flink.util.Preconditions.checkNotNull(Preconditions.java:58) 
> ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9898008Z  at 
> org.apache.flink.runtime.io.network.netty.NettyPartitionRequestClient.<init>(NettyPartitionRequestClient.java:73)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9899040Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connect(PartitionRequestClientFactory.java:116)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9900118Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connectWithRetries(PartitionRequestClientFactory.java:101)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9901443Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.lambda$createPartitionRequestClient$1(PartitionRequestClientFactory.java:78)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9902613Z  at 
> org.apache.flink.runtime.concurrent.FutureUtils.completeFromCallable(FutureUtils.java:87)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9904043Z  at 
> org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:78)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9905404Z  at 
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:67)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9906893Z  at 
> org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:156)
>  ~[flink-dist_2.11-1.12-SNAPSHOT.jar:1.12-SNAPSHOT]
> 2020-09-26T22:16:26.9907510Z  ... 12 more
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to