[
https://issues.apache.org/jira/browse/HUDI-5555?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sagar Sumit closed HUDI-5555.
-----------------------------
Resolution: Fixed
> Set class loader for parquet data block
> ---------------------------------------
>
> Key: HUDI-5555
> URL: https://issues.apache.org/jira/browse/HUDI-5555
> Project: Apache Hudi
> Issue Type: Bug
> Components: core
> Reporter: Jonathan Vexler
> Assignee: Jonathan Vexler
> Priority: Blocker
> Labels: pull-request-available
> Fix For: 0.13.0
>
>
> inlineConf.setClassLoader(InLineFileSystem.class.getClassLoader());
> is set in HoodieHFileDataBlock but it is not set in HoodieParquetDataBlock
> This causes
> {code:java}
> java.lang.RuntimeException: java.lang.ClassNotFoundException: Class
> org.apache.hudi.common.fs.inline.InLineFileSystem not found at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2667) at
> org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3431) at
> org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3466) at
> org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:174) at
> org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3574) at
> org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3521) at
> org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540) at
> org.apache.hadoop.fs.Path.getFileSystem(Path.java:365) at
> org.apache.parquet.hadoop.ParquetReader$Builder.build(ParquetReader.java:336)
> at
> org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:168)
> at
> org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:99)
> at
> org.apache.hudi.io.storage.HoodieAvroFileReaderBase.getRecordIterator(HoodieAvroFileReaderBase.java:39)
> at
> org.apache.hudi.io.storage.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:53)
> at
> org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:162)
> at
> org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:128)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:779)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:641)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:691)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:379)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:231)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:220)
> at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:114)
> at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:107)
> at
> org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:61)
> at
> org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:49)
> at
> org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader$Builder.build(HoodieMetadataMergedLogRecordReader.java:232)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getLogRecordScanner(HoodieBackedTableMetadata.java:528)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.openReaders(HoodieBackedTableMetadata.java:438)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getOrCreateReaders$12(HoodieBackedTableMetadata.java:421)
> at
> java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getOrCreateReaders(HoodieBackedTableMetadata.java:421)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$2(HoodieBackedTableMetadata.java:227)
> at java.util.HashMap.forEach(HashMap.java:1290) at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:225)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:148)
> at
> org.apache.hudi.metadata.BaseTableMetadata.fetchAllFilesInPartition(BaseTableMetadata.java:327)
> at
> org.apache.hudi.metadata.BaseTableMetadata.getAllFilesInPartition(BaseTableMetadata.java:145)
> at
> org.apache.hudi.metadata.HoodieMetadataFileSystemView.listPartition(HoodieMetadataFileSystemView.java:65)
> at
> org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$ensurePartitionLoadedCorrectly$10(AbstractTableFileSystemView.java:311)
> at
> java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)
> at
> org.apache.hudi.common.table.view.AbstractTableFileSystemView.ensurePartitionLoadedCorrectly(AbstractTableFileSystemView.java:302)
> at
> org.apache.hudi.common.table.view.AbstractTableFileSystemView.getLatestBaseFiles(AbstractTableFileSystemView.java:515)
> at
> org.apache.hudi.hadoop.HoodieROTablePathFilter.accept(HoodieROTablePathFilter.java:200)
> at
> org.apache.spark.sql.execution.datasources.PathFilterWrapper.accept(InMemoryFileIndex.scala:165)
> at
> org.apache.spark.util.HadoopFSUtils$.$anonfun$listLeafFiles$8(HadoopFSUtils.scala:285)
> at
> org.apache.spark.util.HadoopFSUtils$.$anonfun$listLeafFiles$8$adapted(HadoopFSUtils.scala:285)
> at
> scala.collection.TraversableLike.$anonfun$filterImpl$1(TraversableLike.scala:304)
> at
> scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
> at
> scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at
> scala.collection.TraversableLike.filterImpl(TraversableLike.scala:303) at
> scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:297) at
> scala.collection.mutable.ArrayOps$ofRef.filterImpl(ArrayOps.scala:198) at
> scala.collection.TraversableLike.filter(TraversableLike.scala:395) at
> scala.collection.TraversableLike.filter$(TraversableLike.scala:395) at
> scala.collection.mutable.ArrayOps$ofRef.filter(ArrayOps.scala:198) at
> org.apache.spark.util.HadoopFSUtils$.listLeafFiles(HadoopFSUtils.scala:285)
> at
> org.apache.spark.util.HadoopFSUtils$.$anonfun$parallelListLeafFilesInternal$1(HadoopFSUtils.scala:95)
> at
> scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
> at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at
> scala.collection.TraversableLike.map(TraversableLike.scala:286) at
> scala.collection.TraversableLike.map$(TraversableLike.scala:279) at
> scala.collection.AbstractTraversable.map(Traversable.scala:108) at
> org.apache.spark.util.HadoopFSUtils$.parallelListLeafFilesInternal(HadoopFSUtils.scala:85)
> at
> org.apache.spark.util.HadoopFSUtils$.parallelListLeafFiles(HadoopFSUtils.scala:69)
> at
> org.apache.spark.sql.execution.datasources.InMemoryFileIndex$.bulkListLeafFiles(InMemoryFileIndex.scala:158)
> at
> org.apache.spark.sql.execution.datasources.InMemoryFileIndex.listLeafFiles(InMemoryFileIndex.scala:131)
> at
> org.apache.spark.sql.execution.datasources.InMemoryFileIndex.refresh0(InMemoryFileIndex.scala:94)
> at
> org.apache.spark.sql.execution.datasources.InMemoryFileIndex.<init>(InMemoryFileIndex.scala:66)
> at
> org.apache.spark.sql.execution.datasources.DataSource.createInMemoryFileIndex(DataSource.scala:565)
> at
> org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:409)
> at
> org.apache.hudi.BaseFileOnlyRelation.toHadoopFsRelation(BaseFileOnlyRelation.scala:203)
> at
> org.apache.hudi.DefaultSource$.resolveBaseFileOnlyRelation(DefaultSource.scala:277)
> at org.apache.hudi.DefaultSource$.createRelation(DefaultSource.scala:241)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:115) at
> org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:72) at
> org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350)
> at
> org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274)
> at
> org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245)
> at scala.Option.getOrElse(Option.scala:189) at
> org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) at
> org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188) at
> org.apache.hudi.integ.testsuite.dag.nodes.ValidateDatasetNode.getDatasetToValidate(ValidateDatasetNode.java:56)
> at
> org.apache.hudi.integ.testsuite.dag.nodes.BaseValidateDatasetNode.execute(BaseValidateDatasetNode.java:116)
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:135)
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:104)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266) at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)Caused by:
> java.lang.ClassNotFoundException: Class
> org.apache.hudi.common.fs.inline.InLineFileSystem not found at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2571)
> at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2665)
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)