[
https://issues.apache.org/jira/browse/FLINK-15320?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated FLINK-15320:
-----------------------------------
Labels: pull-request-available (was: )
> JobManager crashes in the standalone model when cancelling job which subtask'
> status is scheduled
> -------------------------------------------------------------------------------------------------
>
> Key: FLINK-15320
> URL: https://issues.apache.org/jira/browse/FLINK-15320
> Project: Flink
> Issue Type: Bug
> Components: Runtime / Coordination
> Affects Versions: 1.10.0
> Reporter: lining
> Assignee: Zhu Zhu
> Priority: Blocker
> Labels: pull-request-available
> Fix For: 1.10.0
>
>
> Use start-cluster.sh to start a standalone cluster, and then submit a job
> from the streaming's example which name is TopSpeedWindowing, parallelism is
> 20. Wait for one minute, cancel the job, jobmanager will crash. The exception
> stack is:
> 2019-12-19 10:12:11,060 ERROR
> org.apache.flink.runtime.util.FatalExitExceptionHandler - FATAL: Thread
> 'flink-akka.actor.default-dispatcher-2' produced an uncaught exception.
> Stopping the process...2019-12-19 10:12:11,060 ERROR
> org.apache.flink.runtime.util.FatalExitExceptionHandler - FATAL: Thread
> 'flink-akka.actor.default-dispatcher-2' produced an uncaught exception.
> Stopping the process...java.util.concurrent.CompletionException:
> java.util.concurrent.CompletionException: java.lang.IllegalStateException:
> Could not assign resource
> org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@583585c6 to
> current execution Attempt #0 (Window(GlobalWindows(), DeltaTrigger,
> TimeEvictor, ComparableAggregator, PassThroughWindowFunction) -> Sink: Print
> to Std. Out (19/20)) @ (unassigned) - [CANCELED]. at
> org.apache.flink.runtime.scheduler.DefaultScheduler.propagateIfNonNull(DefaultScheduler.java:387)
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$deployAll$4(DefaultScheduler.java:372)
> at
> java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:822)
> at
> java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:797)
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> at
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> at
> org.apache.flink.runtime.concurrent.FutureUtils$WaitingConjunctFuture.handleCompletedFuture(FutureUtils.java:705)
> at
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> at
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> at
> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SchedulerImpl.lambda$internalAllocateSlot$0(SchedulerImpl.java:170)
> at
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> at
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> at
> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotPoolImpl.tryFulfillSlotRequestOrMakeAvailable(SlotPoolImpl.java:534)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotPoolImpl.releaseSingleSlot(SlotPoolImpl.java:479)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotPoolImpl.releaseSlot(SlotPoolImpl.java:390)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotSharingManager$MultiTaskSlot.release(SlotSharingManager.java:557)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotSharingManager$MultiTaskSlot.releaseChild(SlotSharingManager.java:607)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotSharingManager$MultiTaskSlot.access$700(SlotSharingManager.java:352)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SlotSharingManager$SingleTaskSlot.release(SlotSharingManager.java:716)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SchedulerImpl.releaseSharedSlot(SchedulerImpl.java:552)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SchedulerImpl.cancelSlotRequest(SchedulerImpl.java:184)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SchedulerImpl.returnLogicalSlot(SchedulerImpl.java:195)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot.lambda$returnSlotToOwner$0(SingleLogicalSlot.java:181)
> at
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> at
> java.util.concurrent.CompletableFuture.uniWhenCompleteStage(CompletableFuture.java:778)
> at
> java.util.concurrent.CompletableFuture.whenComplete(CompletableFuture.java:2140)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot.returnSlotToOwner(SingleLogicalSlot.java:178)
> at
> org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot.releaseSlot(SingleLogicalSlot.java:125)
> at
> org.apache.flink.runtime.executiongraph.Execution.releaseAssignedResource(Execution.java:1451)
> at
> org.apache.flink.runtime.executiongraph.Execution.finishCancellation(Execution.java:1170)
> at
> org.apache.flink.runtime.executiongraph.Execution.completeCancelling(Execution.java:1150)
> at
> org.apache.flink.runtime.executiongraph.Execution.completeCancelling(Execution.java:1129)
> at
> org.apache.flink.runtime.executiongraph.Execution.cancelAtomically(Execution.java:1111)
> at
> org.apache.flink.runtime.executiongraph.Execution.cancel(Execution.java:804)
> at
> org.apache.flink.runtime.executiongraph.ExecutionVertex.cancel(ExecutionVertex.java:729)
> at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
> at
> java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
> at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at
> java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
> at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
> at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at
> java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at
> org.apache.flink.runtime.executiongraph.ExecutionJobVertex.mapExecutionVertices(ExecutionJobVertex.java:505)
> at
> org.apache.flink.runtime.executiongraph.ExecutionJobVertex.cancelWithFuture(ExecutionJobVertex.java:494)
> at
> org.apache.flink.runtime.executiongraph.ExecutionGraph.cancelVerticesAsync(ExecutionGraph.java:952)
> at
> org.apache.flink.runtime.executiongraph.ExecutionGraph.cancel(ExecutionGraph.java:903)
> at
> org.apache.flink.runtime.scheduler.SchedulerBase.cancel(SchedulerBase.java:432)
> at org.apache.flink.runtime.jobmaster.JobMaster.cancel(JobMaster.java:364)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498) at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:279)
> at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:194)
> at
> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
> at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26) at
> akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21) at
> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123) at
> akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21) at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170) at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) at
> akka.actor.Actor$class.aroundReceive(Actor.scala:517) at
> akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225) at
> akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) at
> akka.actor.ActorCell.invoke(ActorCell.scala:561) at
> akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) at
> akka.dispatch.Mailbox.run(Mailbox.scala:225) at
> akka.dispatch.Mailbox.exec(Mailbox.scala:235) at
> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)Caused
> by: java.util.concurrent.CompletionException:
> java.lang.IllegalStateException: Could not assign resource
> org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@583585c6 to
> current execution Attempt #0 (Window(GlobalWindows(), DeltaTrigger,
> TimeEvictor, ComparableAggregator, PassThroughWindowFunction) -> Sink: Print
> to Std. Out (19/20)) @ (unassigned) - [CANCELED]. at
> java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
> at
> java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
> at
> java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:824)
> at
> java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:797)
> ... 69 moreCaused by: java.lang.IllegalStateException: Could not assign
> resource
> org.apache.flink.runtime.jobmaster.slotpool.SingleLogicalSlot@583585c6 to
> current execution Attempt #0 (Window(GlobalWindows(), DeltaTrigger,
> TimeEvictor, ComparableAggregator, PassThroughWindowFunction) -> Sink: Print
> to Std. Out (19/20)) @ (unassigned) - [CANCELED]. at
> org.apache.flink.runtime.executiongraph.ExecutionVertex.tryAssignResource(ExecutionVertex.java:701)
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$assignResourceOrHandleError$5(DefaultScheduler.java:409)
> at
> java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:822)
> ... 70 more2019-12-19 10:12:11,066 INFO
> org.apache.flink.runtime.blob.BlobServer - Stopped BLOB
> server at 0.0.0.0:54944
--
This message was sent by Atlassian Jira
(v8.3.4#803005)