usberkeley commented on PR #12232: URL: https://github.com/apache/hudi/pull/12232#issuecomment-2472667977
@danny0405 Many current PRs are throwing errors. Could there be an issue with the HoodieTableMetadataUtil (1.1.0-SNAPSHOT) that the test-spark integration tests depend on? Similar PRs include: [PR 12242](https://github.com/apache/hudi/pull/12242), [PR 12241](https://github.com/apache/hudi/pull/12241) ``` java // HoodieTableMetadataUtil#translateWriteStatToFileStats buffer is -1 return getFileStatsRangeMetadata(writeStat.getPartitionPath(), getFileNameFromPath(filePath), datasetMetaClient, columnsToIndex, false, -1); } ``` Exception Messageļ¼ ``` java [Executor task launch worker for task 0.0 in stage 63.0 (TID 153)] ERROR org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader [] - Got exception when reading log file java.lang.IllegalArgumentException: Buffer size <= 0 at java.io.BufferedInputStream.<init>(BufferedInputStream.java:199) ~[?:?] at org.apache.hadoop.fs.BufferedFSInputStream.<init>(BufferedFSInputStream.java:56) ~[hadoop-client-api-3.3.4.jar:?] at org.apache.hadoop.fs.RawLocalFileSystem.open(RawLocalFileSystem.java:275) ~[hadoop-client-api-3.3.4.jar:?] at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:161) ~[hadoop-client-api-3.3.4.jar:?] at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:372) ~[hadoop-client-api-3.3.4.jar:?] at org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem.open(HoodieWrapperFileSystem.java:208) ~[hudi-hadoop-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.hadoop.fs.HadoopFSUtils.getFSDataInputStream(HadoopFSUtils.java:222) ~[hudi-hadoop-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.storage.hadoop.HoodieHadoopStorage.openSeekable(HoodieHadoopStorage.java:155) ~[hudi-hadoop-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.HoodieLogFileReader.getDataInputStream(HoodieLogFileReader.java:475) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.HoodieLogFileReader.<init>(HoodieLogFileReader.java:111) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.HoodieLogFormatReader.<init>(HoodieLogFormatReader.java:62) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternalV1(AbstractHoodieLogRecordReader.java:243) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:221) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner.scan(HoodieUnMergedLogRecordScanner.java:65) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner.scan(HoodieUnMergedLogRecordScanner.java:61) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.metadata.HoodieTableMetadataUtil.getLogFileColumnRangeMetadata(HoodieTableMetadataUtil.java:1298) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.metadata.HoodieTableMetadataUtil.readColumnRangeMetadataFrom(HoodieTableMetadataUtil.java:1263) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.metadata.HoodieTableMetadataUtil.getFileStatsRangeMetadata(HoodieTableMetadataUtil.java:2201) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.metadata.HoodieTableMetadataUtil.translateWriteStatToFileStats(HoodieTableMetadataUtil.java:2334) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertMetadataToPartitionStatsRecords$58(HoodieTableMetadataUtil.java:2249) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197) ~[?:?] at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625) ~[?:?] at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509) ~[?:?] at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) ~[?:?] at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921) ~[?:?] at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:?] at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682) ~[?:?] at org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertMetadataToPartitionStatsRecords$c7c33f8a$1(HoodieTableMetadataUtil.java:2250) ~[hudi-common-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160) ~[hudi-spark-client-1.1.0-SNAPSHOT.jar:1.1.0-SNAPSHOT] at org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125) ~[spark-core_2.12-3.5.3.jar:3.5.3] at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) ~[scala-library-2.12.18.jar:?] at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) ~[scala-library-2.12.18.jar:?] at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.18.jar:?] at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1614) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1524) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1588) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1389) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1343) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:379) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.scheduler.Task.run(Task.scala:141) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620) ~[spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) [spark-common-utils_2.12-3.5.3.jar:3.5.3] at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) [spark-common-utils_2.12-3.5.3.jar:3.5.3] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) [spark-core_2.12-3.5.3.jar:3.5.3] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) [spark-core_2.12-3.5.3.jar:3.5.3] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?] at java.lang.Thread.run(Thread.java:840) [?:?] ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
