zhaoyudi-creator commented on issue #3336:
URL: https://github.com/apache/parquet-java/issues/3336#issuecomment-3718870309
org.apache.hudi.exception.HoodieException: unable to read next record from
parquet file
at
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:53)
at
org.apache.hudi.common.util.MappingIterator.hasNext(MappingIterator.java:35)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:65)
at
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:154)
at
org.apache.hudi.table.HoodieSparkCopyOnWriteTable.handleUpdateInternal(HoodieSparkCopyOnWriteTable.java:230)
at
org.apache.hudi.table.HoodieSparkCopyOnWriteTable.handleUpdate(HoodieSparkCopyOnWriteTable.java:221)
at
org.apache.hudi.table.action.compact.CompactionExecutionHelper.writeFileAndGetWriteStats(CompactionExecutionHelper.java:64)
at
org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:248)
at
org.apache.hudi.table.action.compact.HoodieCompactor.lambda$compactInParallel$6d347a86$1(HoodieCompactor.java:155)
at
org.apache.spark.api.java.JavaPairRDD$.$anonfun$toScalaFunction$1(JavaPairRDD.scala:1070)
at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1556)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1466)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1530)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1357)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:140)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:562)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1555)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:565)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read
value at 1771814 in block 4 in file
00000002-628c-4456-a893-f9ea6b31122b-0_11164-0-11164_20251221141817486.parquet
at
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:264)
at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132)
at
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:48)
... 32 more
Caused by: org.apache.parquet.io.ParquetDecodingException: Can't read value
in column [created_tm] optional binary created_tm (STRING) at value 267595 out
of 716627, 7595 out of 20000 in currentPage. repetition level: 0, definition
level: 1
at
org.apache.parquet.column.impl.ColumnReaderBase.readValue(ColumnReaderBase.java:553)
at
org.apache.parquet.column.impl.ColumnReaderImpl.readValue(ColumnReaderImpl.java:30)
at
org.apache.parquet.column.impl.ColumnReaderBase.writeCurrentValueToConverter(ColumnReaderBase.java:439)
at
org.apache.parquet.column.impl.ColumnReaderImpl.writeCurrentValueToConverter(ColumnReaderImpl.java:30)
at
org.apache.parquet.io.RecordReaderImplementation.read(RecordReaderImplementation.java:406)
at
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:234)
... 34 more
Caused by: org.apache.parquet.io.ParquetDecodingException: could not read
bytes at offset 174666
at
org.apache.parquet.column.values.plain.BinaryPlainValuesReader.readBytes(BinaryPlainValuesReader.java:42)
at
org.apache.parquet.column.impl.ColumnReaderBase$2$6.read(ColumnReaderBase.java:372)
at
org.apache.parquet.column.impl.ColumnReaderBase.readValue(ColumnReaderBase.java:533)
... 39 more
Caused by: java.io.EOFException
at
org.apache.parquet.bytes.SingleBufferInputStream.slice(SingleBufferInputStream.java:116)
at
org.apache.parquet.column.values.plain.BinaryPlainValuesReader.readBytes(BinaryPlainValuesReader.java:40)
... 41 more
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]