[ 
https://issues.apache.org/jira/browse/FLINK-30879?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17692228#comment-17692228
 ] 

Matthias Pohl commented on FLINK-30879:
---------------------------------------

https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=46420&view=logs&j=aa18c3f6-13b8-5f58-86bb-c1cffb239496&t=502fb6c0-30a2-5e49-c5c2-a00fa3acb203&l=36958
A different stacktrace but still caused by a request timeout:
{code}
[...]
Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed by 
FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=1, 
backoffTimeMS=0)
        at 
org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139)
        at 
org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:83)
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.recordTaskFailure(DefaultScheduler.java:258)
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:249)
        at 
org.apache.flink.runtime.scheduler.DefaultScheduler.onTaskFailed(DefaultScheduler.java:242)
        at 
org.apache.flink.runtime.scheduler.SchedulerBase.onTaskExecutionStateUpdate(SchedulerBase.java:749)
        at 
org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:726)
        at 
org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:80)
        at 
org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:479)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:309)
        at 
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83)
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:307)
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:222)
        at 
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:84)
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:168)
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)
        at scala.PartialFunction.applyOrElse(PartialFunction.scala:127)
        at scala.PartialFunction.applyOrElse$(PartialFunction.scala:126)
        at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:175)
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:176)
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:176)
        at akka.actor.Actor.aroundReceive(Actor.scala:537)
        at akka.actor.Actor.aroundReceive$(Actor.scala:535)
        at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)
        at akka.actor.ActorCell.receiveMessage(ActorCell.scala:579)
        at akka.actor.ActorCell.invoke(ActorCell.scala:547)
        at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)
        at akka.dispatch.Mailbox.run(Mailbox.scala:231)
        at akka.dispatch.Mailbox.exec(Mailbox.scala:243)
        ... 4 more
Caused by: org.apache.kafka.common.KafkaException: 
org.apache.kafka.common.KafkaException: Unexpected error in 
InitProducerIdResponse; The request timed out.
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at 
java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:593)
        at 
java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:677)
        at java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:735)
        at 
java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:159)
        at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:173)
        at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
        at 
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
        at 
java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:650)
        at 
org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.abortTransactions(FlinkKafkaProducer.java:1290)
        at 
org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.initializeState(FlinkKafkaProducer.java:1216)
        at 
org.apache.flink.streaming.util.functions.StreamingFunctionUtils.tryRestoreFunction(StreamingFunctionUtils.java:189)
        at 
org.apache.flink.streaming.util.functions.StreamingFunctionUtils.restoreFunctionState(StreamingFunctionUtils.java:171)
        at 
org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.initializeState(AbstractUdfStreamOperator.java:95)
        at 
org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.initializeOperatorState(StreamOperatorStateHandler.java:122)
        at 
org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:274)
        at 
org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:106)
        at 
org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:741)
        at 
org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$SynchronizedStreamTaskActionExecutor.call(StreamTaskActionExecutor.java:100)
        at 
org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:717)
        at 
org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:684)
        at 
org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:952)
        at 
org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:921)
        at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:745)
        at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562)
        at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.kafka.common.KafkaException: Unexpected error in 
InitProducerIdResponse; The request timed out.
        at 
org.apache.kafka.clients.producer.internals.TransactionManager$InitProducerIdHandler.handleResponse(TransactionManager.java:1418)
        at 
org.apache.kafka.clients.producer.internals.TransactionManager$TxnRequestHandler.onComplete(TransactionManager.java:1322)
        at 
org.apache.kafka.clients.ClientResponse.onComplete(ClientResponse.java:109)
        at 
org.apache.kafka.clients.NetworkClient.completeResponses(NetworkClient.java:583)
        at org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:575)
        at 
org.apache.kafka.clients.producer.internals.Sender.maybeSendAndPollTransactionalRequest(Sender.java:418)
        at 
org.apache.kafka.clients.producer.internals.Sender.runOnce(Sender.java:316)
        at 
org.apache.kafka.clients.producer.internals.Sender.run(Sender.java:243)
        ... 1 more
{code}

> KafkaShuffleITCase failed with multiple test failures due to a 
> TimeoutException while creating topics
> -----------------------------------------------------------------------------------------------------
>
>                 Key: FLINK-30879
>                 URL: https://issues.apache.org/jira/browse/FLINK-30879
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Connectors / Kafka
>    Affects Versions: 1.17.0
>            Reporter: Matthias Pohl
>            Priority: Critical
>              Labels: test-stability
>
> The test topic creation failed for multiple tests in 
> {{{}KafkaShuffleITCase{}}}.
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=45586&view=logs&j=fa307d6d-91b1-5ab6-d460-ef50f552b1fe&t=21eae189-b04c-5c04-662b-17dc80ffc83a&l=36821
> {code:java}
> Feb 02 02:45:42 java.lang.AssertionError: Create test topic : 
> test_assigned_to_partition-c7f57154-bf5c-4270-9d26-8f54394573b7_IngestionTime 
> failed, org.apache.kafka.common.errors.TimeoutException: Timed out waiting 
> for a node assignment. Call: createTopics
> Feb 02 02:45:42       at 
> org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl.createTestTopic(KafkaTestEnvironmentImpl.java:199)
> Feb 02 02:45:42       at 
> org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironment.createTestTopic(KafkaTestEnvironment.java:97)
> Feb 02 02:45:42       at 
> org.apache.flink.streaming.connectors.kafka.KafkaTestBase.createTestTopic(KafkaTestBase.java:213)
> Feb 02 02:45:42       at 
> org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleITCase.testAssignedToPartition(KafkaShuffleITCase.java:295)
> Feb 02 02:45:42       at 
> org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleITCase.testAssignedToPartitionIngestionTime(KafkaShuffleITCase.java:115)
> Feb 02 02:45:42       at 
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> Feb 02 02:45:42       at 
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> Feb 02 02:45:42       at 
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> Feb 02 02:45:42       at 
> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> Feb 02 02:45:42       at 
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
> Feb 02 02:45:42       at 
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> Feb 02 02:45:42       at 
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
> Feb 02 02:45:42       at 
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> Feb 02 02:45:42       at 
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> Feb 02 02:45:42       at 
> org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:299)
> Feb 02 02:45:42       at 
> org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:293)
> Feb 02 02:45:42       at 
> java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
> Feb 02 02:45:42       at java.base/java.lang.Thread.run(Thread.java:829) 
> {code}
> The original cause might be a {{{}TimeoutException{}}}:
> {code:java}
> java.util.concurrent.ExecutionException: 
> org.apache.kafka.common.errors.TimeoutException: Timed out waiting for a node 
> assignment. Call: createTopics
>       at 
> java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:395)
>       at 
> java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1999)
>       at 
> org.apache.kafka.common.internals.KafkaFutureImpl.get(KafkaFutureImpl.java:165)
>       at 
> org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl.createTestTopic(KafkaTestEnvironmentImpl.java:175)
>       at 
> org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironment.createTestTopic(KafkaTestEnvironment.java:97)
>       at 
> org.apache.flink.streaming.connectors.kafka.KafkaTestBase.createTestTopic(KafkaTestBase.java:213)
>       at 
> org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleITCase.testAssignedToPartition(KafkaShuffleITCase.java:295)
>       at 
> org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleITCase.testAssignedToPartitionIngestionTime(KafkaShuffleITCase.java:115)
>  {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to