[
https://issues.apache.org/jira/browse/FLINK-26255?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yun Gao updated FLINK-26255:
----------------------------
Labels: test-stability (was: )
> SplitAggregateITCase.testAggWithJoin failed on azure
> ----------------------------------------------------
>
> Key: FLINK-26255
> URL: https://issues.apache.org/jira/browse/FLINK-26255
> Project: Flink
> Issue Type: Bug
> Components: Runtime / State Backends
> Affects Versions: 1.15.0
> Reporter: Roman Khachatryan
> Assignee: Roman Khachatryan
> Priority: Blocker
> Labels: test-stability
> Fix For: 1.15.0
>
>
> [https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=31850&view=logs&j=0c940707-2659-5648-cbe6-a1ad63045f0a&t=075c2716-8010-5565-fe08-3c4bb45824a4&l=10497]
> Acknowledge of a checkpoint failed, then the checkpoint expired, then
> checkpoint failure threshold was reached and job failed.
> {code}
> Randomly selected true for execution.checkpointing.unaligned
> Randomly selected PT2S for execution.checkpointing.alignment-timeout
> Randomly selected true for state.backend.changelog.enabled
> Randomly selected PT0.1S for
> state.backend.changelog.periodic-materialize.interval
> {code}
> {code}
> [ERROR] Tests run: 64, Failures: 0, Errors: 1, Skipped: 0, Time elapsed:
> 700.545 s <<< FAILURE! - in
> org.apache.flink.table.planner.runtime.stream.sql.SplitAggregateITCase
> [ERROR] SplitAggregateITCase.testAggWithJoin Time elapsed: 601.77 s <<<
> ERROR!
> org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
> at
> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
> at
> org.apache.flink.runtime.minicluster.MiniClusterJobClient.lambda$getJobExecutionResult$3(MiniCl
> usterJobClient.java:141)
> at
> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
> at
> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> at
> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
> at
> org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.lambda$invokeRpc$1(AkkaInvocationHandle
> r.java:259)
> at
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> at
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> at
> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
> at
> org.apache.flink.util.concurrent.FutureUtils.doForward(FutureUtils.java:1389)
> at
> org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$null$1(ClassLoadingUtils.java
> :93)
> at
> org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadi
> ngUtils.java:68)
> at
> org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$guardCompletionWithContextCla
> ssLoader$2(ClassLoadingUtils.java:92)
> at
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> at
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> at
> java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
> at
> org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$1.onComplete(AkkaFutureUtils.java:47)
> at akka.dispatch.OnComplete.internal(Future.scala:300)
> at akka.dispatch.OnComplete.internal(Future.scala:297)
> at akka.dispatch.japi$CallbackBridge.apply(Future.scala:224)
> at akka.dispatch.japi$CallbackBridge.apply(Future.scala:221)
> at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:60)
> at
> org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$DirectExecutionContext.execute(AkkaFut
> ureUtils.java:65)
> at
> scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:68)
> at
> scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1(Promise.scala:284)
> at
> scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1$adapted(Promise.scala:284)
> at
> scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:284)
> ...
> Caused by: org.apache.flink.util.FlinkRuntimeException: Exceeded checkpoint
> tolerable failure threshold.
> at
> org.apache.flink.runtime.checkpoint.CheckpointFailureManager.checkFailureAgainstCounter(Checkpo
> intFailureManager.java:160)
> at
> org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleJobLevelCheckpointException(
> CheckpointFailureManager.java:123)
> at
> org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleCheckpointException(Checkpoi
> ntFailureManager.java:90)
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.abortPendingCheckpoint(CheckpointCoor
> dinator.java:2046)
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.abortPendingCheckpoint(CheckpointCoor
> dinator.java:2025)
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.access$600(CheckpointCoordinator.java
> :98)
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator$CheckpointCanceller.run(CheckpointCoo
> rdinator.java:2104)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThread
> PoolExecutor.java:180)
> at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExe
> cutor.java:293)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> {code}
> {code}
> 12:18:11,760 [jobmanager-io-thread-5] WARN
> org.apache.flink.runtime.jobmaster.JobMaster [] - Error while
> processing AcknowledgeCheckpoint message
> java.lang.IllegalStateException: Attempt to reference unknown state:
> 4a798990-1428-424c-813a-2ec1c4fcee8f-KeyGroupRange{startKeyGroup=0,
> endKeyGroup=31}-000019.sst
> at
> org.apache.flink.util.Preconditions.checkState(Preconditions.java:193)
> ~[flink-core-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.state.SharedStateRegistryImpl.registerReference(SharedStateRegistryImpl.java:82)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.state.IncrementalRemoteKeyedStateHandle.registerSharedStates(IncrementalRemoteKeyedStateHandle.java:317)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.state.SharedStateRegistryImpl.registerAll(SharedStateRegistryImpl.java:172)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.state.changelog.ChangelogStateBackendHandle$ChangelogStateBackendHandleImpl.registerSharedStates(ChangelogStateBackendHandle.java:124)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.checkpoint.OperatorSubtaskState.registerSharedState(OperatorSubtaskState.java:229)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.checkpoint.OperatorSubtaskState.registerSharedStates(OperatorSubtaskState.java:219)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.checkpoint.TaskStateSnapshot.registerSharedStates(TaskStateSnapshot.java:189)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveAcknowledgeMessage(CheckpointCoordinator.java:1114)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$acknowledgeCheckpoint$1(ExecutionGraphHandler.java:89)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$processCheckpointCoordinatorMessage$3(ExecutionGraphHandler.java:119)
> ~[flink-runtime-1.15-SNAPSHOT.jar:1.15-SNAPSHOT]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> [?:1.8.0_292]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> [?:1.8.0_292]
> at java.lang.Thread.run(Thread.java:748) [?:1.8.0_292]
> {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)