[
https://issues.apache.org/jira/browse/FLINK-22548?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17338903#comment-17338903
]
Piotr Nowojski commented on FLINK-22548:
----------------------------------------
This is just a TRACE logging issue. There was indeed a potential illegal access
to buffer when printing some TRACE level logs, so it shouldn't affect any
users. And it wouldn't be causing any data corruptions even user enabled TRACE
logs. (fix is also trivial an will be merged soon)
> UnalignedCheckpointRescaleITCase.shouldRescaleUnalignedCheckpoint fail due to
> IllegalReferenceCountException
> ------------------------------------------------------------------------------------------------------------
>
> Key: FLINK-22548
> URL: https://issues.apache.org/jira/browse/FLINK-22548
> Project: Flink
> Issue Type: Bug
> Components: Runtime / Checkpointing
> Affects Versions: 1.13.0, 1.14.0
> Reporter: Guowei Ma
> Assignee: Piotr Nowojski
> Priority: Critical
> Labels: test-stability
> Fix For: 1.14.0, 1.13.1
>
>
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=17499&view=logs&j=34f41360-6c0d-54d3-11a1-0292a2def1d9&t=2d56e022-1ace-542f-bf1a-b37dd63243f2&l=9615
> {code:java}
> 2021-05-02T23:18:10.5449890Z May 02 23:18:10 [ERROR]
> shouldRescaleUnalignedCheckpoint[upscale pipeline from 1 to
> 2](org.apache.flink.test.checkpointing.UnalignedCheckpointRescaleITCase)
> Time elapsed: 2.645 s <<< ERROR!
> 2021-05-02T23:18:10.5451234Z May 02 23:18:10
> org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
> 2021-05-02T23:18:10.5452049Z May 02 23:18:10 at
> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
> 2021-05-02T23:18:10.5453118Z May 02 23:18:10 at
> org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase.execute(UnalignedCheckpointTestBase.java:167)
> 2021-05-02T23:18:10.5454315Z May 02 23:18:10 at
> org.apache.flink.test.checkpointing.UnalignedCheckpointRescaleITCase.shouldRescaleUnalignedCheckpoint(UnalignedCheckpointRescaleITCase.java:368)
> 2021-05-02T23:18:10.5455228Z May 02 23:18:10 at
> sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 2021-05-02T23:18:10.5499169Z May 02 23:18:10 at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 2021-05-02T23:18:10.5500258Z May 02 23:18:10 at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2021-05-02T23:18:10.5501465Z May 02 23:18:10 at
> java.lang.reflect.Method.invoke(Method.java:498)
> 2021-05-02T23:18:10.5502547Z May 02 23:18:10 at
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 2021-05-02T23:18:10.5503965Z May 02 23:18:10 at
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 2021-05-02T23:18:10.5504867Z May 02 23:18:10 at
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 2021-05-02T23:18:10.5505729Z May 02 23:18:10 at
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 2021-05-02T23:18:10.5506527Z May 02 23:18:10 at
> org.junit.rules.Verifier$1.evaluate(Verifier.java:35)
> 2021-05-02T23:18:10.5507319Z May 02 23:18:10 at
> org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2021-05-02T23:18:10.5508119Z May 02 23:18:10 at
> org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45)
> 2021-05-02T23:18:10.5508896Z May 02 23:18:10 at
> org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
> 2021-05-02T23:18:10.5509592Z May 02 23:18:10 at
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2021-05-02T23:18:10.5510318Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> 2021-05-02T23:18:10.5517041Z May 02 23:18:10 at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> 2021-05-02T23:18:10.5518247Z May 02 23:18:10 at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> 2021-05-02T23:18:10.5520181Z May 02 23:18:10 at
> org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2021-05-02T23:18:10.5520928Z May 02 23:18:10 at
> org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2021-05-02T23:18:10.5521660Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2021-05-02T23:18:10.5522323Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2021-05-02T23:18:10.5523524Z May 02 23:18:10 at
> org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2021-05-02T23:18:10.5524309Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2021-05-02T23:18:10.5524934Z May 02 23:18:10 at
> org.junit.runners.Suite.runChild(Suite.java:128)
> 2021-05-02T23:18:10.5525581Z May 02 23:18:10 at
> org.junit.runners.Suite.runChild(Suite.java:27)
> 2021-05-02T23:18:10.5526293Z May 02 23:18:10 at
> org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2021-05-02T23:18:10.5526966Z May 02 23:18:10 at
> org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2021-05-02T23:18:10.5527688Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2021-05-02T23:18:10.5535712Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2021-05-02T23:18:10.5536510Z May 02 23:18:10 at
> org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2021-05-02T23:18:10.5537216Z May 02 23:18:10 at
> org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2021-05-02T23:18:10.5538023Z May 02 23:18:10 at
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2021-05-02T23:18:10.5538639Z May 02 23:18:10 at
> org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2021-05-02T23:18:10.5539313Z May 02 23:18:10 at
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> 2021-05-02T23:18:10.5540069Z May 02 23:18:10 at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> 2021-05-02T23:18:10.5540838Z May 02 23:18:10 at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> 2021-05-02T23:18:10.5541565Z May 02 23:18:10 at
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> 2021-05-02T23:18:10.5543081Z May 02 23:18:10 at
> org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> 2021-05-02T23:18:10.5544143Z May 02 23:18:10 at
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> 2021-05-02T23:18:10.5544872Z May 02 23:18:10 at
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> 2021-05-02T23:18:10.5545584Z May 02 23:18:10 at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> 2021-05-02T23:18:10.5546418Z May 02 23:18:10 Caused by:
> org.apache.flink.runtime.JobException: Recovery is suppressed by
> FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=1,
> backoffTimeMS=100)
> 2021-05-02T23:18:10.5547515Z May 02 23:18:10 at
> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:138)
> 2021-05-02T23:18:10.5548618Z May 02 23:18:10 at
> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:82)
> 2021-05-02T23:18:10.5549630Z May 02 23:18:10 at
> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:207)
> 2021-05-02T23:18:10.5550454Z May 02 23:18:10 at
> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:197)
> 2021-05-02T23:18:10.5551323Z May 02 23:18:10 at
> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:188)
> 2021-05-02T23:18:10.5552179Z May 02 23:18:10 at
> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:677)
> 2021-05-02T23:18:10.5553067Z May 02 23:18:10 at
> org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:79)
> 2021-05-02T23:18:10.5553934Z May 02 23:18:10 at
> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:435)
> 2021-05-02T23:18:10.5554583Z May 02 23:18:10 at
> sun.reflect.GeneratedMethodAccessor26.invoke(Unknown Source)
> 2021-05-02T23:18:10.5555251Z May 02 23:18:10 at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2021-05-02T23:18:10.5556119Z May 02 23:18:10 at
> java.lang.reflect.Method.invoke(Method.java:498)
> 2021-05-02T23:18:10.5556779Z May 02 23:18:10 at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:305)
> 2021-05-02T23:18:10.5557555Z May 02 23:18:10 at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:212)
> 2021-05-02T23:18:10.5558345Z May 02 23:18:10 at
> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
> 2021-05-02T23:18:10.5559115Z May 02 23:18:10 at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
> 2021-05-02T23:18:10.5559825Z May 02 23:18:10 at
> akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> 2021-05-02T23:18:10.5560452Z May 02 23:18:10 at
> akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> 2021-05-02T23:18:10.5561113Z May 02 23:18:10 at
> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> 2021-05-02T23:18:10.5561772Z May 02 23:18:10 at
> akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> 2021-05-02T23:18:10.5562411Z May 02 23:18:10 at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> 2021-05-02T23:18:10.5563194Z May 02 23:18:10 at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2021-05-02T23:18:10.5563943Z May 02 23:18:10 at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2021-05-02T23:18:10.5564550Z May 02 23:18:10 at
> akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> 2021-05-02T23:18:10.5565175Z May 02 23:18:10 at
> akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> 2021-05-02T23:18:10.5565795Z May 02 23:18:10 at
> akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> 2021-05-02T23:18:10.5566542Z May 02 23:18:10 at
> akka.actor.ActorCell.invoke(ActorCell.scala:561)
> 2021-05-02T23:18:10.5567147Z May 02 23:18:10 at
> akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> 2021-05-02T23:18:10.5567711Z May 02 23:18:10 at
> akka.dispatch.Mailbox.run(Mailbox.scala:225)
> 2021-05-02T23:18:10.5568274Z May 02 23:18:10 at
> akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> 2021-05-02T23:18:10.5568937Z May 02 23:18:10 at
> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> 2021-05-02T23:18:10.5569616Z May 02 23:18:10 at
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> 2021-05-02T23:18:10.5570312Z May 02 23:18:10 at
> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> 2021-05-02T23:18:10.5570987Z May 02 23:18:10 at
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> 2021-05-02T23:18:10.5571793Z May 02 23:18:10 Caused by:
> org.apache.flink.shaded.netty4.io.netty.util.IllegalReferenceCountException:
> refCnt: 0, increment: 1
> 2021-05-02T23:18:10.5572863Z May 02 23:18:10 at
> org.apache.flink.shaded.netty4.io.netty.util.internal.ReferenceCountUpdater.retain0(ReferenceCountUpdater.java:123)
> 2021-05-02T23:18:10.5573769Z May 02 23:18:10 at
> org.apache.flink.shaded.netty4.io.netty.util.internal.ReferenceCountUpdater.retain(ReferenceCountUpdater.java:110)
> 2021-05-02T23:18:10.5574789Z May 02 23:18:10 at
> org.apache.flink.shaded.netty4.io.netty.buffer.AbstractReferenceCountedByteBuf.retain(AbstractReferenceCountedByteBuf.java:80)
> 2021-05-02T23:18:10.5575672Z May 02 23:18:10 at
> org.apache.flink.runtime.io.network.buffer.NetworkBuffer.retainBuffer(NetworkBuffer.java:166)
> 2021-05-02T23:18:10.5576462Z May 02 23:18:10 at
> org.apache.flink.runtime.io.network.buffer.NetworkBuffer.retainBuffer(NetworkBuffer.java:47)
> 2021-05-02T23:18:10.5577241Z May 02 23:18:10 at
> org.apache.flink.runtime.io.network.buffer.BufferConsumer.copy(BufferConsumer.java:143)
> 2021-05-02T23:18:10.5578035Z May 02 23:18:10 at
> org.apache.flink.runtime.io.network.buffer.BufferConsumer.toDebugString(BufferConsumer.java:202)
> 2021-05-02T23:18:10.5578861Z May 02 23:18:10 at
> org.apache.flink.runtime.io.network.logger.NetworkActionsLogger.traceRecover(NetworkActionsLogger.java:94)
> 2021-05-02T23:18:10.5579757Z May 02 23:18:10 at
> org.apache.flink.runtime.io.network.partition.PipelinedSubpartition.addRecovered(PipelinedSubpartition.java:142)
> 2021-05-02T23:18:10.5580850Z May 02 23:18:10 at
> org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionRecoveredStateHandler.recover(RecoveredChannelStateHandler.java:195)
> 2021-05-02T23:18:10.5581946Z May 02 23:18:10 at
> org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionRecoveredStateHandler.recover(RecoveredChannelStateHandler.java:144)
> 2021-05-02T23:18:10.5583033Z May 02 23:18:10 at
> org.apache.flink.runtime.checkpoint.channel.ChannelStateChunkReader.readChunk(SequentialChannelStateReaderImpl.java:207)
> 2021-05-02T23:18:10.5584104Z May 02 23:18:10 at
> org.apache.flink.runtime.checkpoint.channel.SequentialChannelStateReaderImpl.readSequentially(SequentialChannelStateReaderImpl.java:107)
> 2021-05-02T23:18:10.5585071Z May 02 23:18:10 at
> org.apache.flink.runtime.checkpoint.channel.SequentialChannelStateReaderImpl.read(SequentialChannelStateReaderImpl.java:93)
> 2021-05-02T23:18:10.5586048Z May 02 23:18:10 at
> org.apache.flink.runtime.checkpoint.channel.SequentialChannelStateReaderImpl.readOutputData(SequentialChannelStateReaderImpl.java:79)
> 2021-05-02T23:18:10.5586930Z May 02 23:18:10 at
> org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:571)
> 2021-05-02T23:18:10.5587723Z May 02 23:18:10 at
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55)
> 2021-05-02T23:18:10.5588521Z May 02 23:18:10 at
> org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:554)
> 2021-05-02T23:18:10.5589195Z May 02 23:18:10 at
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:756)
> 2021-05-02T23:18:10.5589848Z May 02 23:18:10 at
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:563)
> 2021-05-02T23:18:10.5590546Z May 02 23:18:10 at
> java.lang.Thread.run(Thread.java:748)
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)