[ https://issues.apache.org/jira/browse/FLINK-29712?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17621486#comment-17621486 ]
macdoor615 commented on FLINK-29712: ------------------------------------ job parameters {code:java} SET table.dml-sync = true; SET execution.runtime-mode = batch; SET parallelism.default = -1; SET table.exec.hive.infer-source-parallelism = true; SET table.exec.hive.infer-source-parallelism.max = 16; SET taskmanager.network.memory.buffers-per-channel = 0; SET jobmanager.adaptive-batch-scheduler.avg-data-volume-per-task = 4kb; SET table.dynamic-table-options.enabled = true; SET taskmanager.network.memory.buffer-debloat.enabled = true; SET table.exec.legacy-cast-behaviour=enabled; {code} > The same batch task works fine in 1.15.2 and 1.16.0-rc1, but fails in > 1.16.0-rc2 > -------------------------------------------------------------------------------- > > Key: FLINK-29712 > URL: https://issues.apache.org/jira/browse/FLINK-29712 > Project: Flink > Issue Type: Bug > Components: Table SQL / Client > Affects Versions: 1.16.0 > Environment: Flink 1.16.0-rc2 > Hive 3.1.3 > Hadoop 3.3.4 > Reporter: macdoor615 > Priority: Blocker > Fix For: 1.16.0 > > Attachments: flink-conf.yaml > > > All my tasks have failed with same error > {code:java} > org.apache.flink.runtime.JobException: Recovery is suppressed by > FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=2, > backoffTimeMS=60000) > at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139) > at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getGlobalFailureHandlingResult(ExecutionFailureHandler.java:102) > at > org.apache.flink.runtime.scheduler.DefaultScheduler.handleGlobalFailure(DefaultScheduler.java:299) > at > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder$LazyInitializedCoordinatorContext.lambda$failJob$0(OperatorCoordinatorHolder.java:635) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRunAsync$4(AkkaRpcActor.java:453) > at > org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:453) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:218) > at > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:84) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:168) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) > at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) > at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) > at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > at akka.actor.Actor.aroundReceive(Actor.scala:537) > at akka.actor.Actor.aroundReceive$(Actor.scala:535) > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) > at akka.actor.ActorCell.invoke(ActorCell.scala:548) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) > at akka.dispatch.Mailbox.run(Mailbox.scala:231) > at akka.dispatch.Mailbox.exec(Mailbox.scala:243) > at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) > at > java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) > at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) > at > java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) > Caused by: org.apache.flink.util.FlinkException: Global failure triggered by > OperatorCoordinator for 'Source: p_hswtv[4] -> Calc[5]' (operator > 6cdc5bb954874d922eaee11a8e7b5dd5). > at > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder$LazyInitializedCoordinatorContext.failJob(OperatorCoordinatorHolder.java:617) > at > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$QuiesceableContext.failJob(RecreateOnResetOperatorCoordinator.java:237) > at > org.apache.flink.runtime.source.coordinator.SourceCoordinatorContext.failJob(SourceCoordinatorContext.java:360) > at > org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:217) > at > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator$DeferrableCoordinator.applyCall(RecreateOnResetOperatorCoordinator.java:315) > at > org.apache.flink.runtime.operators.coordination.RecreateOnResetOperatorCoordinator.start(RecreateOnResetOperatorCoordinator.java:70) > at > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:198) > at > org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:165) > at > org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startAllOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:82) > at > org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:605) > at > org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:1046) > at > org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:963) > at > org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:422) > at > org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:198) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:622) > at > org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:621) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:190) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) > at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) > at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) > at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > ... 13 more > Caused by: java.lang.NumberFormatException: null > at java.lang.Integer.parseInt(Integer.java:542) > at java.lang.Integer.parseInt(Integer.java:615) > at > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.calculateFilesSizeWithOpenCost(HiveSourceFileEnumerator.java:157) > at > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.setSplitMaxSize(HiveSourceFileEnumerator.java:135) > at > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.createInputSplits(HiveSourceFileEnumerator.java:89) > at > org.apache.flink.connectors.hive.HiveSourceFileEnumerator.enumerateSplits(HiveSourceFileEnumerator.java:67) > at > org.apache.flink.connector.file.src.AbstractFileSource.createEnumerator(AbstractFileSource.java:141) > at > org.apache.flink.connectors.hive.HiveSource.createEnumerator(HiveSource.java:129) > at > org.apache.flink.runtime.source.coordinator.SourceCoordinator.start(SourceCoordinator.java:213) > ... 33 more > > > {code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)