Hangxiang Yu created FLINK-32601:
------------------------------------
Summary: Unstable
RemoteChannelThroughputBenchmark_remoteRebalance_jmhTest
Key: FLINK-32601
URL: https://issues.apache.org/jira/browse/FLINK-32601
Project: Flink
Issue Type: Bug
Components: Benchmarks
Reporter: Hangxiang Yu
It's an exising exception which may occur accidentally, see workflow
[#74|https://github.com/apache/flink-benchmarks/actions/runs/5219158886/jobs/9450453549#logs],
[#40,|https://github.com/apache/flink-benchmarks/actions/runs/4915916989/jobs/8779014239],
[#75,|https://github.com/apache/flink-benchmarks/actions/runs/5527523559/jobs/10171425495]
Exception stack as below:
{code:java}
<shutdown timeout of 30 seconds expired, forcing forked VM to exit>
2310ERROR: org.openjdk.jmh.runner.RunnerException: Benchmark caught the
exception
2311Benchmark had encountered error, and fail on error was requested
2312 at org.openjdk.jmh.runner.Runner.runBenchmarks(Runner.java:570)
2313 at org.openjdk.jmh.runner.Runner.internalRun(Runner.java:313)
2314 at org.openjdk.jmh.runner.Runner.run(Runner.java:206)
2315 at org.openjdk.jmh.Main.main(Main.java:71)
2316Caused by: org.openjdk.jmh.runner.BenchmarkException: Benchmark error
during the run
2317 at
org.openjdk.jmh.runner.BenchmarkHandler.runIteration(BenchmarkHandler.java:428)
2318 at org.openjdk.jmh.runner.BaseRunner.runBenchmark(BaseRunner.java:282)
2319 at org.openjdk.jmh.runner.BaseRunner.runBenchmark(BaseRunner.java:234)
2320 at org.openjdk.jmh.runner.BaseRunner.doSingle(BaseRunner.java:139)
2321 at
org.openjdk.jmh.runner.BaseRunner.runBenchmarksForked(BaseRunner.java:76)
2322 at org.openjdk.jmh.runner.ForkedRunner.run(ForkedRunner.java:72)
2323 at org.openjdk.jmh.runner.ForkedMain.main(ForkedMain.java:84)
2324 Suppressed: org.apache.flink.runtime.client.JobExecutionException: Job
execution failed.
2325 at
org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
2326 at
org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:1010)
2327 at
org.apache.flink.benchmark.RemoteChannelThroughputBenchmark.remoteRebalance(RemoteChannelThroughputBenchmark.java:76)
2328 at
org.apache.flink.benchmark.generated.RemoteChannelThroughputBenchmark_remoteRebalance_jmhTest.remoteRebalance_thrpt_jmhStub(RemoteChannelThroughputBenchmark_remoteRebalance_jmhTest.java:123)
2329 at
org.apache.flink.benchmark.generated.RemoteChannelThroughputBenchmark_remoteRebalance_jmhTest.remoteRebalance_Throughput(RemoteChannelThroughputBenchmark_remoteRebalance_jmhTest.java:85)
2330 at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
2331 at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2332 at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2333 at java.base/java.lang.reflect.Method.invoke(Method.java:566)
2334 at
org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:453)
2335 at
org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:437)
2336 at
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
2337 at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
2338 at
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
2339 at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
2340 at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
2341 at java.base/java.lang.Thread.run(Thread.java:829)
2342 Caused by: org.apache.flink.runtime.JobException: Recovery is
suppressed by NoRestartBackoffTimeStrategy
2343 at
org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:176)
2344 at
org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getGlobalFailureHandlingResult(ExecutionFailureHandler.java:126)
2345 at
org.apache.flink.runtime.scheduler.DefaultScheduler.handleGlobalFailure(DefaultScheduler.java:328)
2346 at
org.apache.flink.runtime.scheduler.UpdateSchedulerNgOnInternalFailuresListener.notifyGlobalFailure(UpdateSchedulerNgOnInternalFailuresListener.java:57)
2347 at
org.apache.flink.runtime.executiongraph.DefaultExecutionGraph.failGlobal(DefaultExecutionGraph.java:1073)
2348 at
org.apache.flink.runtime.executiongraph.DefaultExecutionGraph.failGlobalIfExecutionIsStillRunning(DefaultExecutionGraph.java:1061)
2349 at
org.apache.flink.runtime.executiongraph.DefaultExecutionGraph$1.lambda$failJobDueToTaskFailure$1(DefaultExecutionGraph.java:477)
2350 at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRunAsync$4(AkkaRpcActor.java:453)
2351 at
org.apache.flink.runtime.concurrent.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)
2352 at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:453)
2353 at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:218)
2354 at
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:84)
2355 at
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:168)
2356 at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)
2357 at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)
2358 at scala.PartialFunction.applyOrElse(PartialFunction.scala:127)
2359 at scala.PartialFunction.applyOrElse$(PartialFunction.scala:126)
2360 at
akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)
2361 at
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:175)
2362 at
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:176)
2363 at
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:176)
2364 at akka.actor.Actor.aroundReceive(Actor.scala:537)
2365 at akka.actor.Actor.aroundReceive$(Actor.scala:535)
2366 at
akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)
2367 at akka.actor.ActorCell.receiveMessage(ActorCell.scala:579)
2368 at akka.actor.ActorCell.invoke(ActorCell.scala:547)
2369 at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)
2370 at akka.dispatch.Mailbox.run(Mailbox.scala:231)
2371 at akka.dispatch.Mailbox.exec(Mailbox.scala:243)
2372 at
java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290)
2373 at
java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020)
2374 at
java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656)
2375 at
java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594)
2376 at
java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:183)
2377 Caused by: org.apache.flink.util.FlinkRuntimeException: Exceeded
checkpoint tolerable failure threshold. The latest checkpoint failed due to
Asynchronous task checkpoint failed., view the Checkpoint History tab or the
Job Manager log to find out why continuous checkpoints failed.
2378 at
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.checkFailureAgainstCounter(CheckpointFailureManager.java:212)
2379 at
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleTaskLevelCheckpointException(CheckpointFailureManager.java:191)
2380 at
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleCheckpointException(CheckpointFailureManager.java:124)
2381 at
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.abortPendingCheckpoint(CheckpointCoordinator.java:2151)
2382 at
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveDeclineMessage(CheckpointCoordinator.java:1100)
2383 at
org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$declineCheckpoint$2(ExecutionGraphHandler.java:103)
2384 at
org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$processCheckpointCoordinatorMessage$3(ExecutionGraphHandler.java:119)
2385 at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
2386 at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
2387 at java.base/java.lang.Thread.run(Thread.java:829) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)