[
https://issues.apache.org/jira/browse/FLINK-16383?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Aljoscha Krettek closed FLINK-16383.
------------------------------------
Assignee: Aljoscha Krettek
Resolution: Fixed
I added a potential fix on master in 2f0c4d4ec3241679465c792c4bb5c2ef0e4a150e.
Also added debugging logging in 9d44834199488612fa7c7c8f9b9c641d9785fb95, so if
this occurs again, please re-open the issue and post the log.
> KafkaProducerExactlyOnceITCase. testExactlyOnceRegularSink fails with "The
> producer has already been closed"
> ------------------------------------------------------------------------------------------------------------
>
> Key: FLINK-16383
> URL: https://issues.apache.org/jira/browse/FLINK-16383
> Project: Flink
> Issue Type: Bug
> Components: Connectors / Kafka, Tests
> Reporter: Robert Metzger
> Assignee: Aljoscha Krettek
> Priority: Blocker
> Labels: pull-request-available, test-stability
> Fix For: 1.11.0
>
>
> Logs:
> https://dev.azure.com/rmetzger/Flink/_build/results?buildId=5779&view=logs&j=a54de925-e958-5e24-790a-3a6150eb72d8&t=24e561e9-4c8d-598d-a290-e6acce191345
> {code}
> 2020-03-01T01:06:57.4738418Z 01:06:57,473 [Source: Custom Source -> Map ->
> Sink: Unnamed (1/1)] INFO
> org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaInternalProducer
> [] - Flushing new partitions
> 2020-03-01T01:06:57.4739960Z 01:06:57,473 [FailingIdentityMapper Status
> Printer] INFO
> org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper
> [] - ============================> Failing mapper 0: count=680,
> totalCount=1000
> 2020-03-01T01:06:57.4909074Z
> org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
> 2020-03-01T01:06:57.4910001Z at
> org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:147)
> 2020-03-01T01:06:57.4911000Z at
> org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:648)
> 2020-03-01T01:06:57.4912078Z at
> org.apache.flink.streaming.util.TestStreamEnvironment.execute(TestStreamEnvironment.java:77)
> 2020-03-01T01:06:57.4913039Z at
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1619)
> 2020-03-01T01:06:57.4914421Z at
> org.apache.flink.test.util.TestUtils.tryExecute(TestUtils.java:35)
> 2020-03-01T01:06:57.4915423Z at
> org.apache.flink.streaming.connectors.kafka.KafkaProducerTestBase.testExactlyOnce(KafkaProducerTestBase.java:370)
> 2020-03-01T01:06:57.4916483Z at
> org.apache.flink.streaming.connectors.kafka.KafkaProducerTestBase.testExactlyOnceRegularSink(KafkaProducerTestBase.java:309)
> 2020-03-01T01:06:57.4917305Z at
> sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 2020-03-01T01:06:57.4917982Z at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 2020-03-01T01:06:57.4918769Z at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2020-03-01T01:06:57.4919477Z at
> java.lang.reflect.Method.invoke(Method.java:498)
> 2020-03-01T01:06:57.4920156Z at
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 2020-03-01T01:06:57.4920995Z at
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 2020-03-01T01:06:57.4921927Z at
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 2020-03-01T01:06:57.4922728Z at
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 2020-03-01T01:06:57.4923428Z at
> org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
> 2020-03-01T01:06:57.4924048Z at
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-03-01T01:06:57.4924779Z at
> org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> 2020-03-01T01:06:57.4925528Z at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> 2020-03-01T01:06:57.4926318Z at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> 2020-03-01T01:06:57.4927214Z at
> org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2020-03-01T01:06:57.4927872Z at
> org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2020-03-01T01:06:57.4928587Z at
> org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2020-03-01T01:06:57.4929289Z at
> org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2020-03-01T01:06:57.4929943Z at
> org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2020-03-01T01:06:57.4930672Z at
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2020-03-01T01:06:57.4931512Z at
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2020-03-01T01:06:57.4932255Z at
> org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2020-03-01T01:06:57.4932962Z at
> org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2020-03-01T01:06:57.4933741Z at
> org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-03-01T01:06:57.4934344Z at
> org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2020-03-01T01:06:57.4935193Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> 2020-03-01T01:06:57.4936245Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> 2020-03-01T01:06:57.4937113Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> 2020-03-01T01:06:57.4937925Z at
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> 2020-03-01T01:06:57.4938763Z at
> org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> 2020-03-01T01:06:57.4939656Z at
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> 2020-03-01T01:06:57.4940451Z at
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> 2020-03-01T01:06:57.4941302Z at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> 2020-03-01T01:06:57.4942240Z Caused by:
> org.apache.flink.runtime.JobException: Recovery is suppressed by
> FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=1,
> backoffTimeMS=0)
> 2020-03-01T01:06:57.4943374Z at
> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:110)
> 2020-03-01T01:06:57.4944802Z at
> org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:76)
> 2020-03-01T01:06:57.4945836Z at
> org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:190)
> 2020-03-01T01:06:57.4946730Z at
> org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:184)
> 2020-03-01T01:06:57.4947705Z at
> org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:178)
> 2020-03-01T01:06:57.4948647Z at
> org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:505)
> 2020-03-01T01:06:57.4949500Z at
> org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:383)
> 2020-03-01T01:06:57.4950225Z at
> sun.reflect.GeneratedMethodAccessor20.invoke(Unknown Source)
> 2020-03-01T01:06:57.4950917Z at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2020-03-01T01:06:57.4951721Z at
> java.lang.reflect.Method.invoke(Method.java:498)
> 2020-03-01T01:06:57.4952412Z at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:279)
> 2020-03-01T01:06:57.4953238Z at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:194)
> 2020-03-01T01:06:57.4954080Z at
> org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
> 2020-03-01T01:06:57.4955045Z at
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
> 2020-03-01T01:06:57.4955760Z at
> akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> 2020-03-01T01:06:57.4956435Z at
> akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> 2020-03-01T01:06:57.4957091Z at
> scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> 2020-03-01T01:06:57.4957800Z at
> akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> 2020-03-01T01:06:57.4958491Z at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> 2020-03-01T01:06:57.4959183Z at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2020-03-01T01:06:57.4959872Z at
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2020-03-01T01:06:57.4960521Z at
> akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> 2020-03-01T01:06:57.4961227Z at
> akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> 2020-03-01T01:06:57.4961875Z at
> akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> 2020-03-01T01:06:57.4962453Z at
> akka.actor.ActorCell.invoke(ActorCell.scala:561)
> 2020-03-01T01:06:57.4963028Z at
> akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> 2020-03-01T01:06:57.4963601Z at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> 2020-03-01T01:06:57.4964151Z at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> 2020-03-01T01:06:57.4965046Z at
> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> 2020-03-01T01:06:57.4965802Z at
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> 2020-03-01T01:06:57.4966510Z at
> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> 2020-03-01T01:06:57.4967258Z at
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> 2020-03-01T01:06:57.4967954Z Caused by: java.lang.RuntimeException: Error
> while confirming checkpoint
> 2020-03-01T01:06:57.4968749Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.notifyCheckpointComplete(StreamTask.java:899)
> 2020-03-01T01:06:57.4969694Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$notifyCheckpointCompleteAsync$7(StreamTask.java:873)
> 2020-03-01T01:06:57.4970599Z at
> org.apache.flink.util.function.FunctionUtils.lambda$asCallable$5(FunctionUtils.java:125)
> 2020-03-01T01:06:57.4971407Z at
> java.util.concurrent.FutureTask.run(FutureTask.java:266)
> 2020-03-01T01:06:57.4972310Z at
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$SynchronizedStreamTaskActionExecutor.run(StreamTaskActionExecutor.java:85)
> 2020-03-01T01:06:57.4973437Z at
> org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:78)
> 2020-03-01T01:06:57.4974275Z at
> org.apache.flink.streaming.runtime.tasks.mailbox.MailboxExecutorImpl.tryYield(MailboxExecutorImpl.java:79)
> 2020-03-01T01:06:57.4975421Z at
> org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.quiesceTimeServiceAndCloseOperator(StreamOperatorWrapper.java:138)
> 2020-03-01T01:06:57.4976419Z at
> org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.close(StreamOperatorWrapper.java:113)
> 2020-03-01T01:06:57.4977340Z at
> org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.close(StreamOperatorWrapper.java:117)
> 2020-03-01T01:06:57.4978239Z at
> org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.close(StreamOperatorWrapper.java:117)
> 2020-03-01T01:06:57.4979154Z at
> org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper.close(StreamOperatorWrapper.java:78)
> 2020-03-01T01:06:57.4980051Z at
> org.apache.flink.streaming.runtime.tasks.OperatorChain.closeOperators(OperatorChain.java:305)
> 2020-03-01T01:06:57.4980894Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.afterInvoke(StreamTask.java:503)
> 2020-03-01T01:06:57.4981770Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:482)
> 2020-03-01T01:06:57.4982490Z at
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:717)
> 2020-03-01T01:06:57.4983157Z at
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:541)
> 2020-03-01T01:06:57.4983716Z at java.lang.Thread.run(Thread.java:748)
> 2020-03-01T01:06:57.4984462Z Caused by:
> org.apache.flink.util.FlinkRuntimeException: Committing one of transactions
> failed, logging first encountered failure
> 2020-03-01T01:06:57.4985606Z at
> org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction.notifyCheckpointComplete(TwoPhaseCommitSinkFunction.java:302)
> 2020-03-01T01:06:57.4986723Z at
> org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.notifyCheckpointComplete(AbstractUdfStreamOperator.java:130)
> 2020-03-01T01:06:57.4987718Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$notifyCheckpointComplete$8(StreamTask.java:884)
> 2020-03-01T01:06:57.4988771Z at
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$SynchronizedStreamTaskActionExecutor.call(StreamTaskActionExecutor.java:99)
> 2020-03-01T01:06:57.4989793Z at
> org.apache.flink.streaming.runtime.tasks.StreamTask.notifyCheckpointComplete(StreamTask.java:879)
> 2020-03-01T01:06:57.4990396Z ... 17 more
> 2020-03-01T01:06:57.4990886Z Caused by: java.lang.IllegalStateException: The
> producer has already been closed
> 2020-03-01T01:06:57.4991892Z at
> org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaInternalProducer.ensureNotClosed(FlinkKafkaInternalProducer.java:251)
> 2020-03-01T01:06:57.4993008Z at
> org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaInternalProducer.commitTransaction(FlinkKafkaInternalProducer.java:102)
> 2020-03-01T01:06:57.4994228Z at
> org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.commit(FlinkKafkaProducer.java:905)
> 2020-03-01T01:06:57.4995310Z at
> org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.commit(FlinkKafkaProducer.java:97)
> 2020-03-01T01:06:57.4996317Z at
> org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction.notifyCheckpointComplete(TwoPhaseCommitSinkFunction.java:289)
> 2020-03-01T01:06:57.4997044Z ... 21 more
> 2020-03-01T01:06:57.4998645Z 01:06:57,493 [ main] ERROR
> org.apache.flink.streaming.connectors.kafka.KafkaProducerExactlyOnceITCase []
> -
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)