Jonathan Vexler created HUDI-9800:
-------------------------------------
Summary: File does not exist when we stop suppressing column
metadata read failure
Key: HUDI-9800
URL: https://issues.apache.org/jira/browse/HUDI-9800
Project: Apache Hudi
Issue Type: Bug
Components: metadata
Reporter: Jonathan Vexler
Fix For: 1.1.0
[https://github.com/apache/hudi/blob/6ea636078ce1f48e48522185a3e0dba1d22753aa/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java#L1724]
In this catch, instead of returning a list, throw an exception. Then run the
test
`org.apache.hudi.functional.TestColumnStatsIndex#testMetadataColumnStatsIndexInitializationWithRollbacks`
It will fail with:
{code:java}
org.apache.hudi.exception.HoodieException: Failed to instantiate Metadata table
at
org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:381)
at
org.apache.hudi.client.SparkRDDWriteClient.initMetadataTable(SparkRDDWriteClient.java:343)
at
org.apache.hudi.client.BaseHoodieWriteClient.lambda$doInitTable$9(BaseHoodieWriteClient.java:1345)
at
org.apache.hudi.client.BaseHoodieClient.executeUsingTxnManager(BaseHoodieClient.java:307)
at
org.apache.hudi.client.BaseHoodieWriteClient.doInitTable(BaseHoodieWriteClient.java:1342)
at
org.apache.hudi.client.BaseHoodieWriteClient.initTable(BaseHoodieWriteClient.java:1383)
at
org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:201)
at
org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:205) at
org.apache.hudi.HoodieSparkSqlWriterInternal.liftedTree1$1(HoodieSparkSqlWriter.scala:529)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:527)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:195)
at
org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:213)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:132) at
org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:171) at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
at
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
at
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:869) at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:391)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:364) at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:243) at
org.apache.hudi.functional.ColumnStatIndexTestBase.doWriteAndValidateColumnStats(ColumnStatIndexTestBase.scala:124)
at
org.apache.hudi.functional.TestColumnStatsIndex.testMetadataColumnStatsIndexInitializationWithRollbacks(TestColumnStatsIndex.scala:441)
at java.lang.reflect.Method.invoke(Method.java:498) at
java.util.Optional.ifPresent(Optional.java:159) at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) at
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) at
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at
java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
at
java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:647) at
java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272) at
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at
java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)
at
java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at
java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
at java.util.ArrayList.forEach(ArrayList.java:1259) at
java.util.ArrayList.forEach(ArrayList.java:1259)Caused by:
org.apache.hudi.exception.HoodieMetadataException: Bootstrap on column_stats
partition failed for
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/.hoodie/metadata
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFromFilesystem(HoodieBackedTableMetadataWriter.java:524)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeIfNeeded(HoodieBackedTableMetadataWriter.java:318)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.<init>(HoodieBackedTableMetadataWriter.java:218)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.<init>(HoodieBackedTableMetadataWriter.java:187)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriterTableVersionSix.<init>(HoodieBackedTableMetadataWriterTableVersionSix.java:76)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.<init>(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:95)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.create(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:72)
at
org.apache.hudi.metadata.SparkMetadataWriterFactory.create(SparkMetadataWriterFactory.java:37)
at
org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:375)
... 85 moreCaused by: org.apache.hudi.exception.HoodieInsertException:
Failed to bulk insert for commit time 20250909122556524010 at
org.apache.hudi.table.action.deltacommit.SparkBulkInsertPreppedDeltaCommitActionExecutor.execute(SparkBulkInsertPreppedDeltaCommitActionExecutor.java:58)
at
org.apache.hudi.table.HoodieSparkMergeOnReadTable.bulkInsertPrepped(HoodieSparkMergeOnReadTable.java:139)
at
org.apache.hudi.table.HoodieSparkMergeOnReadTable.bulkInsertPrepped(HoodieSparkMergeOnReadTable.java:88)
at
org.apache.hudi.client.SparkRDDWriteClient.bulkInsertPreppedRecords(SparkRDDWriteClient.java:299)
at
org.apache.hudi.client.SparkRDDWriteClient.bulkInsertPreppedRecords(SparkRDDWriteClient.java:68)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.bulkInsertAndCommit(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:138)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.bulkInsertAndCommit(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:64)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.commitInternal(HoodieBackedTableMetadataWriter.java:1744)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.bulkCommit(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:166)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFilegroupsAndCommit(HoodieBackedTableMetadataWriter.java:728)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFromFilesystem(HoodieBackedTableMetadataWriter.java:476)
... 93 moreCaused by: org.apache.spark.SparkException: Job aborted due to
stage failure: Task 0 in stage 58.0 failed 1 times, most recent failure: Lost
task 0.0 in stage 58.0 (TID 76) (macbookpro executor driver):
org.apache.hudi.exception.HoodieMetadataException: failed to read col range
metadata at
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1725)
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.getColumnStatsRecords(HoodieTableMetadataUtil.java:1695)
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertFilesToColumnStatsRecords$ca97e33f$1(HoodieTableMetadataUtil.java:1327)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:165)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
at
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141) at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)Caused by:
org.apache.hudi.exception.HoodieIOException: Failed to read footer for parquet
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
at
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:116) at
org.apache.hudi.common.util.ParquetUtils.readColumnStatsFromMetadata(ParquetUtils.java:278)
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1712)
... 22 moreCaused by: java.io.FileNotFoundException: File
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
does not exist at
org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:779)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:1100)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:769)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:462)
at
org.apache.parquet.hadoop.util.HadoopInputFile.fromPath(HadoopInputFile.java:39)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:478)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:462)
at
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:113)
... 24 more
Driver stacktrace: at
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2856)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2792)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2791)
at
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2791) at
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1247)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1247)
at scala.Option.foreach(Option.scala:407) at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1247)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3060)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2994)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2983)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:989) at
org.apache.spark.SparkContext.runJob(SparkContext.scala:2393) at
org.apache.spark.SparkContext.runJob(SparkContext.scala:2414) at
org.apache.spark.SparkContext.runJob(SparkContext.scala:2433) at
org.apache.spark.SparkContext.runJob(SparkContext.scala:2458) at
org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1049) at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:410) at
org.apache.spark.rdd.RDD.collect(RDD.scala:1048) at
org.apache.spark.api.java.JavaRDDLike.collect(JavaRDDLike.scala:362) at
org.apache.spark.api.java.JavaRDDLike.collect$(JavaRDDLike.scala:361) at
org.apache.spark.api.java.AbstractJavaRDDLike.collect(JavaRDDLike.scala:45) at
org.apache.hudi.metadata.SparkHoodieMetadataBulkInsertPartitioner.repartitionRecords(SparkHoodieMetadataBulkInsertPartitioner.java:97)
at
org.apache.hudi.metadata.SparkHoodieMetadataBulkInsertPartitioner.repartitionRecords(SparkHoodieMetadataBulkInsertPartitioner.java:42)
at
org.apache.hudi.table.action.commit.SparkBulkInsertHelper.bulkInsert(SparkBulkInsertHelper.java:126)
at
org.apache.hudi.table.action.commit.SparkBulkInsertHelper.bulkInsert(SparkBulkInsertHelper.java:81)
at
org.apache.hudi.table.action.deltacommit.SparkBulkInsertPreppedDeltaCommitActionExecutor.execute(SparkBulkInsertPreppedDeltaCommitActionExecutor.java:52)
... 103 moreCaused by: org.apache.hudi.exception.HoodieMetadataException:
failed to read col range metadata at
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1725)
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.getColumnStatsRecords(HoodieTableMetadataUtil.java:1695)
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertFilesToColumnStatsRecords$ca97e33f$1(HoodieTableMetadataUtil.java:1327)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:165)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
at
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141) at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)Caused by:
org.apache.hudi.exception.HoodieIOException: Failed to read footer for parquet
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
at
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:116) at
org.apache.hudi.common.util.ParquetUtils.readColumnStatsFromMetadata(ParquetUtils.java:278)
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1712)
... 22 moreCaused by: java.io.FileNotFoundException: File
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
does not exist at
org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:779)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:1100)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:769)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:462)
at
org.apache.parquet.hadoop.util.HadoopInputFile.fromPath(HadoopInputFile.java:39)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:478)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:462)
at
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:113)
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)