[
https://issues.apache.org/jira/browse/FLINK-23077?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jingsong Lee closed FLINK-23077.
--------------------------------
Resolution: Invalid
OOM.
state backend should be rocksdb.
> Running nexmark q5 with 1.13.1 of pipeline.object-reuse=true, the taskmanager
> will be killed and produce failover.
> ------------------------------------------------------------------------------------------------------------------
>
> Key: FLINK-23077
> URL: https://issues.apache.org/jira/browse/FLINK-23077
> Project: Flink
> Issue Type: Bug
> Components: Table SQL / Runtime
> Affects Versions: 1.13.0, 1.13.1
> Reporter: xiaojin.wy
> Priority: Major
>
> Running nexmark with flink 1.13.0, 1.13.1, q5 can`t success.
> *The conf is: *
> pipeline.object-reuse=true
> *The sql is:*
> CREATE TABLE discard_sink (
> auction BIGINT,
> num BIGINT
> ) WITH (
> 'connector' = 'blackhole'
> );
> INSERT INTO discard_sink
> SELECT AuctionBids.auction, AuctionBids.num
> FROM (
> SELECT
> B1.auction,
> count(*) AS num,
> HOP_START(B1.dateTime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) AS
> starttime,
> HOP_END(B1.dateTime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) AS
> endtime
> FROM bid B1
> GROUP BY
> B1.auction,
> HOP(B1.dateTime, INTERVAL '2' SECOND, INTERVAL '10' SECOND)
> ) AS AuctionBids
> JOIN (
> SELECT
> max(CountBids.num) AS maxn,
> CountBids.starttime,
> CountBids.endtime
> FROM (
> SELECT
> count(*) AS num,
> HOP_START(B2.dateTime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) AS
> starttime,
> HOP_END(B2.dateTime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) AS
> endtime
> FROM bid B2
> GROUP BY
> B2.auction,
> HOP(B2.dateTime, INTERVAL '2' SECOND, INTERVAL '10' SECOND)
> ) AS CountBids
> GROUP BY CountBids.starttime, CountBids.endtime
> ) AS MaxBids
> ON AuctionBids.starttime = MaxBids.starttime AND
> AuctionBids.endtime = MaxBids.endtime AND
> AuctionBids.num >= MaxBids.maxn;%
> *The error is:*
> 2021-06-21 15:00:19,992 INFO
> org.apache.flink.runtime.executiongraph.ExecutionGraph [] - Job
> insert-into_default_catalog.default_database.discard_sink
> (676beebce60930ac033522b4367806b0) switched from state FAILING to FAILED.
> org.apache.flink.runtime.JobException: Recovery is suppressed by
> NoRestartBackoffTimeStrategy
> at
> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:138)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:82)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:207)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:197)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:188)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:677)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.UpdateSchedulerNgOnInternalFailuresListener.notifyTaskFailure(UpdateSchedulerNgOnInternalFailuresListener.java:51)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.executiongraph.DefaultExecutionGraph.notifySchedulerNgAboutInternalTaskFailure(DefaultExecutionGraph.java:1462)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.executiongraph.Execution.processFail(Execution.java:1140)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.executiongraph.Execution.processFail(Execution.java:1080)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.executiongraph.Execution.markFailed(Execution.java:911)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.executiongraph.ExecutionVertex.markFailed(ExecutionVertex.java:472)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.DefaultExecutionVertexOperations.markFailed(DefaultExecutionVertexOperations.java:41)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskDeploymentFailure(DefaultScheduler.java:498)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.scheduler.DefaultScheduler.lambda$assignResourceOrHandleError$7(DefaultScheduler.java:483)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:822)
> ~[?:1.8.0_102]
> at
> java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:797)
> ~[?:1.8.0_102]
> at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> ~[?:1.8.0_102]
> at
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> ~[?:1.8.0_102]
> at
> org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolBridge$PendingRequest.failRequest(DeclarativeSlotPoolBridge.java:532)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolBridge.cancelPendingRequests(DeclarativeSlotPoolBridge.java:128)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolBridge.failPendingRequests(DeclarativeSlotPoolBridge.java:360)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolBridge.notifyNotEnoughResourcesAvailable(DeclarativeSlotPoolBridge.java:351)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.jobmaster.JobMaster.notifyNotEnoughResourcesAvailable(JobMaster.java:816)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> ~[?:1.8.0_102]
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> ~[?:1.8.0_102]
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> ~[?:1.8.0_102]
> at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_102]
> at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:301)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:212)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> at
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> [flink-dist_2.11-1.13.1.jar:1.13.1]
> Caused by: java.util.concurrent.CompletionException:
> org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException:
> Could not acquire the minimum required resources.
> at
> java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
> ~[?:1.8.0_102]
> at
> java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
> ~[?:1.8.0_102]
> at
> java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:593)
> ~[?:1.8.0_102]
> at
> java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577)
> ~[?:1.8.0_102]
> ... 33 more
> Caused by:
> org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException:
> Could not acquire the minimum required resources.
> at
> org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolBridge.failPendingRequests(DeclarativeSlotPoolBridge.java:356)
> ~[flink-dist_2.11-1.13.1.jar:1.13.1]
> ... 28 more
> 2021-06-21 15:00:19,992 INFO org.apache.fli
> !image-2021-06-22-11-30-58-022.png!
--
This message was sent by Atlassian Jira
(v8.3.4#803005)