hbgstc123 opened a new issue, #8325:
URL: https://github.com/apache/hudi/issues/8325
**Describe the problem you faced**
spark read hudi meet this error, happen every time, only happen in new hudi
version 0.12.2, works fine when switch to hudi version 0.10.1
**To Reproduce**
Steps to reproduce the behavior:
1.write hudi table with spark
2.read hudi table with spark
**Expected behavior**
A clear and concise description of what you expected to happen.
**Environment Description**
* Hudi version : 0.12.2 (have error), 0.10.1 (work fine)
* Spark version : 3.1
* Storage (HDFS/S3/GCS..) :HDFS
* Running on Docker? (yes/no) :no
**Additional context**
**Stacktrace**
```
at
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:169)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:525)
at
org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:453)
at
org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:452)
at
org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:496)
at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:132)
at
org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:746)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.window.WindowExec.doExecute(WindowExec.scala:121)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:525)
at
org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:453)
at
org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:452)
at
org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:496)
at
org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:149)
at
org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:50)
at
org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:746)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:123)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:123)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:157)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:155)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:172)
at
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 170 more
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
class org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:91)
at
org.apache.hudi.common.bootstrap.index.BootstrapIndex.getBootstrapIndex(BootstrapIndex.java:163)
at
org.apache.hudi.common.table.view.AbstractTableFileSystemView.init(AbstractTableFileSystemView.java:109)
at
org.apache.hudi.common.table.view.HoodieTableFileSystemView.init(HoodieTableFileSystemView.java:108)
at
org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:102)
at
org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:93)
at
org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:166)
at
org.apache.hudi.BaseHoodieTableFileIndex.loadFileSlicesForPartitions(BaseHoodieTableFileIndex.java:246)
at
org.apache.hudi.BaseHoodieTableFileIndex.ensurePreloadedPartitions(BaseHoodieTableFileIndex.java:234)
at
org.apache.hudi.BaseHoodieTableFileIndex.getInputFileSlices(BaseHoodieTableFileIndex.java:220)
at org.apache.hudi.HoodieFileIndex.listFiles(HoodieFileIndex.scala:137)
at
org.apache.spark.sql.execution.FileSourceScanExec.selectedPartitions$lzycompute(DataSourceScanExec.scala:215)
at
org.apache.spark.sql.execution.FileSourceScanExec.selectedPartitions(DataSourceScanExec.scala:210)
at
org.apache.spark.sql.execution.FileSourceScanExec.dynamicallySelectedPartitions$lzycompute(DataSourceScanExec.scala:245)
at
org.apache.spark.sql.execution.FileSourceScanExec.dynamicallySelectedPartitions(DataSourceScanExec.scala:226)
at
org.apache.spark.sql.execution.FileSourceScanExec.inputRDD$lzycompute(DataSourceScanExec.scala:413)
at
org.apache.spark.sql.execution.FileSourceScanExec.inputRDD(DataSourceScanExec.scala:398)
at
org.apache.spark.sql.execution.FileSourceScanExec.doExecuteColumnar(DataSourceScanExec.scala:497)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:208)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at
org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:204)
at
org.apache.spark.sql.execution.InputAdapter.doExecuteColumnar(WholeStageCodegenExec.scala:519)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:208)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at
org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:204)
at
org.apache.spark.sql.execution.ColumnarToRowExec.inputRDDs(Columnar.scala:202)
at
org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:746)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.FilterExec.doExecute(basicPhysicalOperators.scala:245)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.ProjectExec.doExecute(basicPhysicalOperators.scala:92)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:123)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:123)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:157)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:155)
at
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:172)
at
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 211 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:89)
... 264 more
Caused by: org.apache.hudi.exception.HoodieIOException: Filesystem closed
at
org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.<init>(HFileBootstrapIndex.java:106)
... 269 more
Caused by: java.io.IOException: Filesystem closed
at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:475)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1685)
at
org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1591)
at
org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1588)
at
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at
org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1603)
at
org.apache.hadoop.hdfs.ForwardDistributedFileSystem.getFileStatus(ForwardDistributedFileSystem.java:1197)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1683)
at
org.apache.hudi.common.fs.HoodieWrapperFileSystem.exists(HoodieWrapperFileSystem.java:558)
at
org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.<init>(HFileBootstrapIndex.java:104)
... 269 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]