[ https://issues.apache.org/jira/browse/FLINK-30076?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17635705#comment-17635705 ]
luoyuxia commented on FLINK-30076: ---------------------------------- [~kcz] Thanks for reporting. I try to reproduce your problem, but can't reproduce it in my local env. But I have the feelings that it may be a bug of parquet like something of PARQUET-2078 And in Flink 1.15, we upgrade parquet to 1.12.2. Maybe there still are some other bugs in 1.12.2. Could you please try with Flink 1.14 or downgrade the parquet version to 1.11.1 to build Flink and then to see whether this problem still exist? > hive join mysql error > --------------------- > > Key: FLINK-30076 > URL: https://issues.apache.org/jira/browse/FLINK-30076 > Project: Flink > Issue Type: Bug > Components: Connectors / Hive, Table SQL / Runtime > Affects Versions: 1.16.0, 1.15.2 > Environment: flink:1.15.2 1.16.0 > hive:3.1.0 > mysql:5.7 > Reporter: zck > Priority: Major > > There is no problem when reading hive table parquet alone, but after doing > join with MySQL table, the above error appears, but there is no error when > hive format is ORC. > select * from hive where id in (select id from mysql) error. > > > org.apache.flink.runtime.JobException: Recovery is suppressed by > NoRestartBackoffTimeStrategy at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139) > at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:83) > at > org.apache.flink.runtime.scheduler.DefaultScheduler.recordTaskFailure(DefaultScheduler.java:256) > at > org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:247) > at > org.apache.flink.runtime.scheduler.DefaultScheduler.onTaskFailed(DefaultScheduler.java:240) > at > org.apache.flink.runtime.scheduler.SchedulerBase.onTaskExecutionStateUpdate(SchedulerBase.java:738) > at > org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:715) > at > org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:78) > at > org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:477) > at sun.reflect.GeneratedMethodAccessor41.invoke(Unknown Source) at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:309) > at > org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:307) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:222) > at > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:84) > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:168) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) at > scala.PartialFunction.applyOrElse(PartialFunction.scala:123) at > scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) at > akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) at > akka.actor.Actor.aroundReceive(Actor.scala:537) at > akka.actor.Actor.aroundReceive$(Actor.scala:535) at > akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) at > akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) at > akka.actor.ActorCell.invoke(ActorCell.scala:548) at > akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) at > akka.dispatch.Mailbox.run(Mailbox.scala:231) at > akka.dispatch.Mailbox.exec(Mailbox.scala:243) at > java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) at > java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) > at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) at > java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) > Caused by: java.lang.RuntimeException: One or more fetchers have encountered > exception at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcherManager.checkErrors(SplitFetcherManager.java:225) > at > org.apache.flink.connector.base.source.reader.SourceReaderBase.getNextFetch(SourceReaderBase.java:169) > at > org.apache.flink.connector.base.source.reader.SourceReaderBase.pollNext(SourceReaderBase.java:130) > at > org.apache.flink.streaming.api.operators.SourceOperator.emitNext(SourceOperator.java:385) > at > org.apache.flink.streaming.runtime.io.StreamTaskSourceInput.emitNext(StreamTaskSourceInput.java:68) > at > org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:65) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:542) > at > org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:231) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:831) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:780) > at > org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:935) > at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:914) > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728) at > org.apache.flink.runtime.taskmanager.Task.run(Task.java:550) at > java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: > SplitFetcher thread 0 received unexpected exception while polling the records > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:150) > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:105) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ... 1 more Caused by: java.lang.IllegalStateException: All of the offsets in > the split should be found in the file. expected: [2234158329] found: [] at > org.apache.parquet.hadoop.ParquetRecordReader.initializeInternalReader(ParquetRecordReader.java:171) > at > org.apache.parquet.hadoop.ParquetRecordReader.initialize(ParquetRecordReader.java:140) > at > org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:95) > at > org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:60) > at > org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:93) > at > org.apache.flink.connectors.hive.read.HiveMapredSplitReader.<init>(HiveMapredSplitReader.java:116) > at > org.apache.flink.connectors.hive.read.HiveInputFormat$HiveReader.<init>(HiveInputFormat.java:326) > at > org.apache.flink.connectors.hive.read.HiveInputFormat$HiveReader.<init>(HiveInputFormat.java:305) > at > org.apache.flink.connectors.hive.read.HiveInputFormat$HiveMapRedBulkFormat.createReader(HiveInputFormat.java:282) > at > org.apache.flink.connectors.hive.read.HiveInputFormat$HiveMapRedBulkFormat.createReader(HiveInputFormat.java:275) > at > org.apache.flink.connectors.hive.read.HiveInputFormat.createReader(HiveInputFormat.java:110) > at > org.apache.flink.connectors.hive.read.HiveInputFormat.createReader(HiveInputFormat.java:65) > at > org.apache.flink.connector.file.src.impl.FileSourceSplitReader.checkSplitOrStartNext(FileSourceSplitReader.java:112) > at > org.apache.flink.connector.file.src.impl.FileSourceSplitReader.fetch(FileSourceSplitReader.java:65) > at > org.apache.flink.connector.base.source.reader.fetcher.FetchTask.run(FetchTask.java:58) > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:142) -- This message was sent by Atlassian Jira (v8.20.10#820010)