[ 
https://issues.apache.org/jira/browse/FLINK-22067?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17340718#comment-17340718
 ] 

Robert Metzger commented on FLINK-22067:
----------------------------------------

Fails on my personal CI (this is just master) 
https://dev.azure.com/rmetzger/Flink/_build/results?buildId=9072&view=logs&j=d0dc8a09-802e-543a-1851-c31096d61b33&t=5952d6c2-ad1d-5ad4-5bfc-48bb7f31ebd9

The stack trace is a bit different this time:

{code}
2021-05-07T10:23:58.3619103Z May 07 10:23:58 [ERROR] Tests run: 9, Failures: 0, 
Errors: 1, Skipped: 0, Time elapsed: 12.603 s <<< FAILURE! - in 
org.apache.flink.state.api.RocksDBStateBackendWindowITCase
2021-05-07T10:23:58.3638516Z May 07 10:23:58 [ERROR] 
testApplyEvictorWindowStateReader(org.apache.flink.state.api.RocksDBStateBackendWindowITCase)
  Time elapsed: 2.288 s  <<< ERROR!
2021-05-07T10:23:58.3639625Z May 07 10:23:58 java.lang.RuntimeException: Failed 
to take savepoint
2021-05-07T10:23:58.3640269Z May 07 10:23:58    at 
org.apache.flink.state.api.utils.SavepointTestBase.takeSavepoint(SavepointTestBase.java:63)
2021-05-07T10:23:58.3641069Z May 07 10:23:58    at 
org.apache.flink.state.api.SavepointWindowReaderITCase.testApplyEvictorWindowStateReader(SavepointWindowReaderITCase.java:361)
2021-05-07T10:23:58.3641776Z May 07 10:23:58    at 
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
2021-05-07T10:23:58.3642383Z May 07 10:23:58    at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2021-05-07T10:23:58.3643071Z May 07 10:23:58    at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2021-05-07T10:23:58.3643696Z May 07 10:23:58    at 
java.lang.reflect.Method.invoke(Method.java:498)
2021-05-07T10:23:58.3644458Z May 07 10:23:58    at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
2021-05-07T10:23:58.3645155Z May 07 10:23:58    at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
2021-05-07T10:23:58.3646757Z May 07 10:23:58    at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
2021-05-07T10:23:58.3663386Z May 07 10:23:58    at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
2021-05-07T10:23:58.3664098Z May 07 10:23:58    at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
2021-05-07T10:23:58.3664765Z May 07 10:23:58    at 
org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45)
2021-05-07T10:23:58.3665593Z May 07 10:23:58    at 
org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
2021-05-07T10:23:58.3666179Z May 07 10:23:58    at 
org.junit.rules.RunRules.evaluate(RunRules.java:20)
2021-05-07T10:23:58.3666762Z May 07 10:23:58    at 
org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
2021-05-07T10:23:58.3667430Z May 07 10:23:58    at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
2021-05-07T10:23:58.3668113Z May 07 10:23:58    at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
2021-05-07T10:23:58.3670532Z May 07 10:23:58    at 
org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
2021-05-07T10:23:58.3671229Z May 07 10:23:58    at 
org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
2021-05-07T10:23:58.3671842Z May 07 10:23:58    at 
org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
2021-05-07T10:23:58.3672455Z May 07 10:23:58    at 
org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
2021-05-07T10:23:58.3673062Z May 07 10:23:58    at 
org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
2021-05-07T10:23:58.3673669Z May 07 10:23:58    at 
org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
2021-05-07T10:23:58.3708725Z May 07 10:23:58    at 
org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
2021-05-07T10:23:58.3709393Z May 07 10:23:58    at 
org.junit.rules.RunRules.evaluate(RunRules.java:20)
2021-05-07T10:23:58.3709977Z May 07 10:23:58    at 
org.junit.runners.ParentRunner.run(ParentRunner.java:363)
2021-05-07T10:23:58.3710616Z May 07 10:23:58    at 
org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
2021-05-07T10:23:58.3711330Z May 07 10:23:58    at 
org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
2021-05-07T10:23:58.3712051Z May 07 10:23:58    at 
org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
2021-05-07T10:23:58.3712746Z May 07 10:23:58    at 
org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
2021-05-07T10:23:58.3713469Z May 07 10:23:58    at 
org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
2021-05-07T10:23:58.3714414Z May 07 10:23:58    at 
org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
2021-05-07T10:23:58.3715098Z May 07 10:23:58    at 
org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
2021-05-07T10:23:58.3715752Z May 07 10:23:58    at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
2021-05-07T10:23:58.3767009Z May 07 10:23:58 Caused by: 
java.util.concurrent.ExecutionException: 
java.util.concurrent.CompletionException: 
org.apache.flink.runtime.checkpoint.CheckpointException: Checkpoint triggering 
task Window(org.apache.flink.state.api.utils.WaitingWindowAssigner@716e431d, 
EventTimeTrigger, NoOpEvictor, NoOpWindowFunction) -> Sink: Unnamed (1/4) of 
job 7febc061a35ab7a41806df6aaa10f60b has not being executed at the moment. 
Aborting checkpoint. Failure reason: Not all required tasks are currently 
running.
2021-05-07T10:23:58.3768604Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
2021-05-07T10:23:58.3769467Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1928)
2021-05-07T10:23:58.3770158Z May 07 10:23:58    at 
org.apache.flink.state.api.utils.SavepointTestBase.takeSavepoint(SavepointTestBase.java:61)
2021-05-07T10:23:58.3770709Z May 07 10:23:58    ... 33 more
2021-05-07T10:23:58.3772641Z May 07 10:23:58 Caused by: 
java.util.concurrent.CompletionException: 
org.apache.flink.runtime.checkpoint.CheckpointException: Checkpoint triggering 
task Window(org.apache.flink.state.api.utils.WaitingWindowAssigner@716e431d, 
EventTimeTrigger, NoOpEvictor, NoOpWindowFunction) -> Sink: Unnamed (1/4) of 
job 7febc061a35ab7a41806df6aaa10f60b has not being executed at the moment. 
Aborting checkpoint. Failure reason: Not all required tasks are currently 
running.
2021-05-07T10:23:58.3774104Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
2021-05-07T10:23:58.3774796Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
2021-05-07T10:23:58.3775493Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:607)
2021-05-07T10:23:58.3776167Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
2021-05-07T10:23:58.3776845Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
2021-05-07T10:23:58.3777530Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
2021-05-07T10:23:58.3778274Z May 07 10:23:58    at 
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.lambda$null$0(CheckpointCoordinator.java:482)
2021-05-07T10:23:58.3778999Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
2021-05-07T10:23:58.3779695Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
2021-05-07T10:23:58.3780391Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
2021-05-07T10:23:58.3781091Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
2021-05-07T10:23:58.3781905Z May 07 10:23:58    at 
org.apache.flink.runtime.checkpoint.CheckpointCoordinator$CheckpointTriggerRequest.completeExceptionally(CheckpointCoordinator.java:2047)
2021-05-07T10:23:58.3782762Z May 07 10:23:58    at 
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.onTriggerFailure(CheckpointCoordinator.java:853)
2021-05-07T10:23:58.3783585Z May 07 10:23:58    at 
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.lambda$startTriggeringCheckpoint$7(CheckpointCoordinator.java:608)
2021-05-07T10:23:58.3784349Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836)
2021-05-07T10:23:58.3785023Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:811)
2021-05-07T10:23:58.3786194Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
2021-05-07T10:23:58.3786854Z May 07 10:23:58    at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
2021-05-07T10:23:58.3787447Z May 07 10:23:58    at 
java.util.concurrent.FutureTask.run(FutureTask.java:266)
2021-05-07T10:23:58.3788156Z May 07 10:23:58    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
2021-05-07T10:23:58.3788973Z May 07 10:23:58    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
2021-05-07T10:23:58.3789753Z May 07 10:23:58    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
2021-05-07T10:23:58.3806658Z May 07 10:23:58    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
2021-05-07T10:23:58.3807290Z May 07 10:23:58    at 
java.lang.Thread.run(Thread.java:748)
2021-05-07T10:23:58.3810695Z May 07 10:23:58 Caused by: 
org.apache.flink.runtime.checkpoint.CheckpointException: Checkpoint triggering 
task Window(org.apache.flink.state.api.utils.WaitingWindowAssigner@716e431d, 
EventTimeTrigger, NoOpEvictor, NoOpWindowFunction) -> Sink: Unnamed (1/4) of 
job 7febc061a35ab7a41806df6aaa10f60b has not being executed at the moment. 
Aborting checkpoint. Failure reason: Not all required tasks are currently 
running.
2021-05-07T10:23:58.3823381Z May 07 10:23:58    at 
org.apache.flink.runtime.checkpoint.DefaultCheckpointPlanCalculator.checkTasksStarted(DefaultCheckpointPlanCalculator.java:152)
2021-05-07T10:23:58.3824333Z May 07 10:23:58    at 
org.apache.flink.runtime.checkpoint.DefaultCheckpointPlanCalculator.lambda$calculateCheckpointPlan$1(DefaultCheckpointPlanCalculator.java:114)
2021-05-07T10:23:58.3865445Z May 07 10:23:58    at 
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
2021-05-07T10:23:58.3866280Z May 07 10:23:58    at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:440)
2021-05-07T10:23:58.3867545Z May 07 10:23:58    at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:208)
2021-05-07T10:23:58.3868716Z May 07 10:23:58    at 
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
2021-05-07T10:23:58.3869590Z May 07 10:23:58    at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
2021-05-07T10:23:58.3870219Z May 07 10:23:58    at 
akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
2021-05-07T10:23:58.3876399Z May 07 10:23:58    at 
akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
2021-05-07T10:23:58.3877044Z May 07 10:23:58    at 
scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
2021-05-07T10:23:58.3877656Z May 07 10:23:58    at 
akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
2021-05-07T10:23:58.3878283Z May 07 10:23:58    at 
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
2021-05-07T10:23:58.3879916Z May 07 10:23:58    at 
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
2021-05-07T10:23:58.3880581Z May 07 10:23:58    at 
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
2021-05-07T10:23:58.3881176Z May 07 10:23:58    at 
akka.actor.Actor$class.aroundReceive(Actor.scala:517)
2021-05-07T10:23:58.3881769Z May 07 10:23:58    at 
akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
2021-05-07T10:23:58.3882372Z May 07 10:23:58    at 
akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
2021-05-07T10:23:58.3882936Z May 07 10:23:58    at 
akka.actor.ActorCell.invoke(ActorCell.scala:561)
2021-05-07T10:23:58.3883498Z May 07 10:23:58    at 
akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
2021-05-07T10:23:58.3884049Z May 07 10:23:58    at 
akka.dispatch.Mailbox.run(Mailbox.scala:225)
2021-05-07T10:23:58.3884578Z May 07 10:23:58    at 
akka.dispatch.Mailbox.exec(Mailbox.scala:235)
2021-05-07T10:23:58.3885151Z May 07 10:23:58    at 
akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
2021-05-07T10:23:58.3886011Z May 07 10:23:58    at 
akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
2021-05-07T10:23:58.3886660Z May 07 10:23:58    at 
akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
2021-05-07T10:23:58.3887311Z May 07 10:23:58    at 
akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
2021-05-07T10:23:58.3887802Z May 07 10:23:58 
2021-05-07T10:23:59.6186290Z May 07 10:23:59 [INFO] Running 
org.apache.flink.state.api.MemoryStateBackendReaderKeyedStateITCase
{code}

> SavepointWindowReaderITCase.testApplyEvictorWindowStateReader
> -------------------------------------------------------------
>
>                 Key: FLINK-22067
>                 URL: https://issues.apache.org/jira/browse/FLINK-22067
>             Project: Flink
>          Issue Type: Bug
>          Components: API / State Processor
>    Affects Versions: 1.13.0
>            Reporter: Till Rohrmann
>            Priority: Critical
>              Labels: auto-deprioritized-critical, test-stability
>
> The test case 
> {{SavepointWindowReaderITCase.testApplyEvictorWindowStateReader}} failed on 
> AZP with:
> {code}
>       at 
> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1928)
>       at 
> org.apache.flink.state.api.utils.SavepointTestBase.takeSavepoint(SavepointTestBase.java:69)
>       ... 33 more
> Caused by: java.util.concurrent.TimeoutException: Invocation of public 
> default java.util.concurrent.CompletableFuture 
> org.apache.flink.runtime.webmonitor.RestfulGateway.triggerSavepoint(org.apache.flink.api.common.JobID,java.lang.String,boolean,org.apache.flink.api.common.time.Time)
>  timed out.
>       at com.sun.proxy.$Proxy32.triggerSavepoint(Unknown Source)
>       at 
> org.apache.flink.runtime.minicluster.MiniCluster.lambda$triggerSavepoint$8(MiniCluster.java:716)
>       at 
> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
>       at 
> java.util.concurrent.CompletableFuture.uniApplyStage(CompletableFuture.java:628)
>       at 
> java.util.concurrent.CompletableFuture.thenApply(CompletableFuture.java:1996)
>       at 
> org.apache.flink.runtime.minicluster.MiniCluster.runDispatcherCommand(MiniCluster.java:751)
>       at 
> org.apache.flink.runtime.minicluster.MiniCluster.triggerSavepoint(MiniCluster.java:714)
>       at 
> org.apache.flink.client.program.MiniClusterClient.triggerSavepoint(MiniClusterClient.java:101)
>       at 
> org.apache.flink.state.api.utils.SavepointTestBase.triggerSavepoint(SavepointTestBase.java:93)
>       at 
> org.apache.flink.state.api.utils.SavepointTestBase.lambda$takeSavepoint$0(SavepointTestBase.java:68)
>       at 
> java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:966)
>       at 
> java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
>       at 
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
>       at 
> java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1646)
>       at 
> java.util.concurrent.CompletableFuture$AsyncRun.exec(CompletableFuture.java:1632)
>       at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
>       at 
> java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
>       at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
>       at 
> java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
> Caused by: akka.pattern.AskTimeoutException: Ask timed out on 
> [Actor[akka://flink/user/rpc/dispatcher_2#-390276455]] after [10000 ms]. 
> Message of type [org.apache.flink.runtime.rpc.messages.LocalFencedMessage]. A 
> typical reason for `AskTimeoutException` is that the recipient actor didn't 
> send a reply.
>       at akka.pattern.PromiseActorRef$$anonfun$2.apply(AskSupport.scala:635)
>       at akka.pattern.PromiseActorRef$$anonfun$2.apply(AskSupport.scala:635)
>       at 
> akka.pattern.PromiseActorRef$$anonfun$1.apply$mcV$sp(AskSupport.scala:648)
>       at akka.actor.Scheduler$$anon$4.run(Scheduler.scala:205)
>       at 
> scala.concurrent.Future$InternalCallbackExecutor$.unbatchedExecute(Future.scala:601)
>       at 
> scala.concurrent.BatchingExecutor$class.execute(BatchingExecutor.scala:109)
>       at 
> scala.concurrent.Future$InternalCallbackExecutor$.execute(Future.scala:599)
>       at 
> akka.actor.LightArrayRevolverScheduler$TaskHolder.executeTask(LightArrayRevolverScheduler.scala:328)
>       at 
> akka.actor.LightArrayRevolverScheduler$$anon$4.executeBucket$1(LightArrayRevolverScheduler.scala:279)
>       at 
> akka.actor.LightArrayRevolverScheduler$$anon$4.nextTick(LightArrayRevolverScheduler.scala:283)
>       at 
> akka.actor.LightArrayRevolverScheduler$$anon$4.run(LightArrayRevolverScheduler.scala:235)
>       at java.lang.Thread.run(Thread.java:748)
> {code}
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=15809&view=logs&j=b2f046ab-ae17-5406-acdc-240be7e870e4&t=93e5ae06-d194-513d-ba8d-150ef6da1d7c&l=9197



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to