[
https://issues.apache.org/jira/browse/HUDI-7424?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HUDI-7424:
---------------------------------
Labels: pull-request-available (was: )
> Throw conversion error of Avro record properly for error table
> --------------------------------------------------------------
>
> Key: HUDI-7424
> URL: https://issues.apache.org/jira/browse/HUDI-7424
> Project: Apache Hudi
> Issue Type: Improvement
> Reporter: Ethan Guo
> Assignee: Ethan Guo
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.15.0, 1.0.0
>
>
> The following exception is thrown when trying to convert a Avro record to be
> sent to error table
> {code:java}
> Job aborted due to stage failure: Task 0 in stage 47.0 failed 4 times, most
> recent failure: Lost task 0.3 in stage 47.0 (TID 3129) (172.18.239.121
> executor 1): java.lang.NullPointerException: null of string in field *** of
> ***
> at
> org.apache.avro.generic.GenericDatumWriter.npe(GenericDatumWriter.java:184)
> at
> org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:178)
> at
> org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:83)
> at
> org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:73)
> at
> org.apache.hudi.avro.HoodieAvroUtils.avroToJsonHelper(HoodieAvroUtils.java:216)
> at
> org.apache.hudi.avro.HoodieAvroUtils.avroToJsonString(HoodieAvroUtils.java:199)
> at
> org.apache.hudi.utilities.streamer.HoodieStreamerUtils.lambda$null$60a9e756$1(HoodieStreamerUtils.java:111)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitions$1(JavaRDDLike.scala:153)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
> at
> org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:386)
> at
> org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1508)
> at
> org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1418)
> at
> org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1482)
> at
> org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1305)
> at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> at
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
> at org.apache.spark.scheduler.Task.run(Task.scala:131)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1491)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: java.lang.NullPointerException
> at org.apache.avro.io.Encoder.writeString(Encoder.java:130)
> at
> org.apache.avro.generic.GenericDatumWriter.writeString(GenericDatumWriter.java:346)
> at
> org.apache.avro.generic.GenericDatumWriter.writeString(GenericDatumWriter.java:338)
> at
> org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:151)
> at
> org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:83)
> at
> org.apache.avro.generic.GenericDatumWriter.writeField(GenericDatumWriter.java:221)
> at
> org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:210)
> at
> org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:131)
> ... 39 more
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)