Jonathan Vexler created HUDI-9800:
-------------------------------------

             Summary: File does not exist when we stop suppressing column 
metadata read failure
                 Key: HUDI-9800
                 URL: https://issues.apache.org/jira/browse/HUDI-9800
             Project: Apache Hudi
          Issue Type: Bug
          Components: metadata
            Reporter: Jonathan Vexler
             Fix For: 1.1.0


[https://github.com/apache/hudi/blob/6ea636078ce1f48e48522185a3e0dba1d22753aa/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java#L1724]

 

In this catch, instead of returning a list, throw an exception. Then run the 
test 
`org.apache.hudi.functional.TestColumnStatsIndex#testMetadataColumnStatsIndexInitializationWithRollbacks`
 

It will fail with:
{code:java}
org.apache.hudi.exception.HoodieException: Failed to instantiate Metadata table 
        at 
org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:381)
     at 
org.apache.hudi.client.SparkRDDWriteClient.initMetadataTable(SparkRDDWriteClient.java:343)
   at 
org.apache.hudi.client.BaseHoodieWriteClient.lambda$doInitTable$9(BaseHoodieWriteClient.java:1345)
   at 
org.apache.hudi.client.BaseHoodieClient.executeUsingTxnManager(BaseHoodieClient.java:307)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.doInitTable(BaseHoodieWriteClient.java:1342)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.initTable(BaseHoodieWriteClient.java:1383)
      at 
org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:201) 
     at 
org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:205)   at 
org.apache.hudi.HoodieSparkSqlWriterInternal.liftedTree1$1(HoodieSparkSqlWriter.scala:529)
   at 
org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:527)
   at 
org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:195)
        at 
org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:213)
   at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:132)  at 
org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:171)        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
    at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
       at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
 at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
     at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
   at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
   at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
   at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
       at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
 at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)  
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
    at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
        at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
       at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
       at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
       at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)  
     at 
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
        at 
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
    at 
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
       at 
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:869)   at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:391)  
     at 
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:364) at 
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:243) at 
org.apache.hudi.functional.ColumnStatIndexTestBase.doWriteAndValidateColumnStats(ColumnStatIndexTestBase.scala:124)
  at 
org.apache.hudi.functional.TestColumnStatsIndex.testMetadataColumnStatsIndexInitializationWithRollbacks(TestColumnStatsIndex.scala:441)
      at java.lang.reflect.Method.invoke(Method.java:498)     at 
java.util.Optional.ifPresent(Optional.java:159)      at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)      at 
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)    at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)      at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)      at 
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)    at 
java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948) 
     at 
java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:647)  at 
java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)    at 
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)    at 
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)    at 
java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)    at 
java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948) 
     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)   
     at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)   
     at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
  at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)      
  at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)     
  at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)  
  at 
java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)  
     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)   
     at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)   
     at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
  at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)      
  at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)     
  at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)  
  at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)  
  at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)  
  at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)  
  at 
java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)  
     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)   
     at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)   
     at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
  at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)      
  at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)     
  at java.util.ArrayList.forEach(ArrayList.java:1259)     at 
java.util.ArrayList.forEach(ArrayList.java:1259)Caused by: 
org.apache.hudi.exception.HoodieMetadataException: Bootstrap on column_stats 
partition failed for 
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/.hoodie/metadata
     at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFromFilesystem(HoodieBackedTableMetadataWriter.java:524)
  at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeIfNeeded(HoodieBackedTableMetadataWriter.java:318)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.<init>(HoodieBackedTableMetadataWriter.java:218)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.<init>(HoodieBackedTableMetadataWriter.java:187)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriterTableVersionSix.<init>(HoodieBackedTableMetadataWriterTableVersionSix.java:76)
       at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.<init>(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:95)
     at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.create(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:72)
     at 
org.apache.hudi.metadata.SparkMetadataWriterFactory.create(SparkMetadataWriterFactory.java:37)
       at 
org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:375)
     ... 85 moreCaused by: org.apache.hudi.exception.HoodieInsertException: 
Failed to bulk insert for commit time 20250909122556524010       at 
org.apache.hudi.table.action.deltacommit.SparkBulkInsertPreppedDeltaCommitActionExecutor.execute(SparkBulkInsertPreppedDeltaCommitActionExecutor.java:58)
    at 
org.apache.hudi.table.HoodieSparkMergeOnReadTable.bulkInsertPrepped(HoodieSparkMergeOnReadTable.java:139)
    at 
org.apache.hudi.table.HoodieSparkMergeOnReadTable.bulkInsertPrepped(HoodieSparkMergeOnReadTable.java:88)
     at 
org.apache.hudi.client.SparkRDDWriteClient.bulkInsertPreppedRecords(SparkRDDWriteClient.java:299)
    at 
org.apache.hudi.client.SparkRDDWriteClient.bulkInsertPreppedRecords(SparkRDDWriteClient.java:68)
     at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.bulkInsertAndCommit(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:138)
       at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.bulkInsertAndCommit(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:64)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.commitInternal(HoodieBackedTableMetadataWriter.java:1744)
   at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriterTableVersionSix.bulkCommit(SparkHoodieBackedTableMetadataWriterTableVersionSix.java:166)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFilegroupsAndCommit(HoodieBackedTableMetadataWriter.java:728)
     at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFromFilesystem(HoodieBackedTableMetadataWriter.java:476)
  ... 93 moreCaused by: org.apache.spark.SparkException: Job aborted due to 
stage failure: Task 0 in stage 58.0 failed 1 times, most recent failure: Lost 
task 0.0 in stage 58.0 (TID 76) (macbookpro executor driver): 
org.apache.hudi.exception.HoodieMetadataException: failed to read col range 
metadata      at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1725)
      at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getColumnStatsRecords(HoodieTableMetadataUtil.java:1695)
    at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertFilesToColumnStatsRecords$ca97e33f$1(HoodieTableMetadataUtil.java:1327)
       at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:165)
 at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125) 
     at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)       at 
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)       at 
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)       at 
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)       at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
      at 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
 at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) 
 at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)  
 at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)    
 at org.apache.spark.scheduler.Task.run(Task.scala:141)  at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
     at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
   at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
  at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)      at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
     at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
     at java.lang.Thread.run(Thread.java:750)Caused by: 
org.apache.hudi.exception.HoodieIOException: Failed to read footer for parquet 
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
 at 
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:116) at 
org.apache.hudi.common.util.ParquetUtils.readColumnStatsFromMetadata(ParquetUtils.java:278)
  at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1712)
      ... 22 moreCaused by: java.io.FileNotFoundException: File 
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
 does not exist  at 
org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:779)
 at 
org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:1100)
      at 
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:769)
   at 
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:462)  
     at 
org.apache.parquet.hadoop.util.HadoopInputFile.fromPath(HadoopInputFile.java:39)
     at 
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:478)
   at 
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:462)
   at 
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:113) 
... 24 more
Driver stacktrace:      at 
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2856)
 at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2792)
       at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2791)
       at 
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)     at 
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)    at 
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)   at 
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2791)  at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1247)
      at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1247)
      at scala.Option.foreach(Option.scala:407)       at 
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1247)
 at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3060)
 at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2994)
   at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2983)
   at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)      at 
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:989)       at 
org.apache.spark.SparkContext.runJob(SparkContext.scala:2393)        at 
org.apache.spark.SparkContext.runJob(SparkContext.scala:2414)        at 
org.apache.spark.SparkContext.runJob(SparkContext.scala:2433)        at 
org.apache.spark.SparkContext.runJob(SparkContext.scala:2458)        at 
org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1049)  at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)  
     at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)  
     at org.apache.spark.rdd.RDD.withScope(RDD.scala:410)    at 
org.apache.spark.rdd.RDD.collect(RDD.scala:1048)     at 
org.apache.spark.api.java.JavaRDDLike.collect(JavaRDDLike.scala:362) at 
org.apache.spark.api.java.JavaRDDLike.collect$(JavaRDDLike.scala:361)        at 
org.apache.spark.api.java.AbstractJavaRDDLike.collect(JavaRDDLike.scala:45)  at 
org.apache.hudi.metadata.SparkHoodieMetadataBulkInsertPartitioner.repartitionRecords(SparkHoodieMetadataBulkInsertPartitioner.java:97)
       at 
org.apache.hudi.metadata.SparkHoodieMetadataBulkInsertPartitioner.repartitionRecords(SparkHoodieMetadataBulkInsertPartitioner.java:42)
       at 
org.apache.hudi.table.action.commit.SparkBulkInsertHelper.bulkInsert(SparkBulkInsertHelper.java:126)
 at 
org.apache.hudi.table.action.commit.SparkBulkInsertHelper.bulkInsert(SparkBulkInsertHelper.java:81)
  at 
org.apache.hudi.table.action.deltacommit.SparkBulkInsertPreppedDeltaCommitActionExecutor.execute(SparkBulkInsertPreppedDeltaCommitActionExecutor.java:52)
    ... 103 moreCaused by: org.apache.hudi.exception.HoodieMetadataException: 
failed to read col range metadata     at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1725)
      at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getColumnStatsRecords(HoodieTableMetadataUtil.java:1695)
    at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertFilesToColumnStatsRecords$ca97e33f$1(HoodieTableMetadataUtil.java:1327)
       at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:165)
 at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125) 
     at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)       at 
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)       at 
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)       at 
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)       at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
      at 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
 at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) 
 at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)  
 at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)    
 at org.apache.spark.scheduler.Task.run(Task.scala:141)  at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
     at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
   at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
  at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)      at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
     at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
     at java.lang.Thread.run(Thread.java:750)Caused by: 
org.apache.hudi.exception.HoodieIOException: Failed to read footer for parquet 
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
 at 
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:116) at 
org.apache.hudi.common.util.ParquetUtils.readColumnStatsFromMetadata(ParquetUtils.java:278)
  at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1712)
      ... 22 moreCaused by: java.io.FileNotFoundException: File 
/var/folders/d0/l7mfhzl1661byhh3mbyg5fv00000gn/T/junit-2255514671142180534/dataset/9/c8ac78ba-4064-4ad7-801b-06f8a3a91c7f-0_0-41-52_20250909122554904.parquet
 does not exist  at 
org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:779)
 at 
org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:1100)
      at 
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:769)
   at 
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:462)  
     at 
org.apache.parquet.hadoop.util.HadoopInputFile.fromPath(HadoopInputFile.java:39)
     at 
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:478)
   at 
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:462)
   at 
org.apache.hudi.common.util.ParquetUtils.readMetadata(ParquetUtils.java:113) 
{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to