Jonathanrodrigr12 commented on issue #7717: URL: https://github.com/apache/hudi/issues/7717#issuecomment-1962262653
Hi, i have the same problem but i am use the HoodieMultiTableStreamer **Description** I have a lot parquet files, all of them have this struct  but the first time when i run the job in emr serverless the data is saved, but int the second attemp i have this error **Expected behavior** The second write succeeds. **Environment Description** Hudi hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar Spark version : 3.4.1 EMR: 6.15.0 Stack Trace `org.apache.hudi.exception.HoodieUpsertException: Error upserting bucketType UPDATE for partition :0 at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:342) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleInsertPartition(BaseSparkCommitActionExecutor.java:348) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$a3ab3c4$1(BaseSparkCommitActionExecutor.java:259) at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102) at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:905) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:905) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:377) at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552) at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375) at org.apache.spark.rdd.RDD.iterator(RDD.scala:326) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) at org.apache.spark.scheduler.Task.run(Task.scala:141) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:563) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:566) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) Caused by: org.apache.hudi.exception.HoodieException: org.apache.avro.SchemaParseException: Can't redefine: value at org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:149) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdateInternal(BaseSparkCommitActionExecutor.java:387) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:369) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:335) ... 30 more Caused by: org.apache.avro.SchemaParseException: Can't redefine: value at org.apache.avro.Schema$Names.put(Schema.java:1586) at org.apache.avro.Schema$NamedSchema.writeNameRef(Schema.java:844) at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1011) at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) at org.apache.avro.Schema$ArraySchema.toJson(Schema.java:1173) at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) at org.apache.avro.Schema.toString(Schema.java:433) at org.apache.avro.Schema.toString(Schema.java:405) at org.apache.avro.Schema.toString(Schema.java:396) at org.apache.parquet.avro.AvroReadSupport.setAvroReadSchema(AvroReadSupport.java:73) at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:162) at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:94) at org.apache.hudi.io.storage.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:73) at org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:126) ... 33 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2974) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2910) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2909) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) ~[scala-library-2.12.15.jar:?] at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) ~[scala-library-2.12.15.jar:?] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) ~[scala-library-2.12.15.jar:?] at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2909) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1263) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1263) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at scala.Option.foreach(Option.scala:407) ~[scala-library-2.12.15.jar:?] at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1263) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3173) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3112) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3101) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1028) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.SparkContext.runJob(SparkContext.scala:2271) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.SparkContext.runJob(SparkContext.scala:2366) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$fold$1(RDD.scala:1172) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.withScope(RDD.scala:405) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.fold(RDD.scala:1166) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.DoubleRDDFunctions.$anonfun$sum$1(DoubleRDDFunctions.scala:36) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23) ~[scala-library-2.12.15.jar:?] at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.withScope(RDD.scala:405) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:36) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.api.java.JavaDoubleRDD.sum(JavaDoubleRDD.scala:165) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.hudi.utilities.streamer.StreamSync.writeToSink(StreamSync.java:804) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:446) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.ingestOnce(HoodieStreamer.java:840) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.utilities.ingestion.HoodieIngestionService.startIngestion(HoodieIngestionService.java:72) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.utilities.streamer.HoodieStreamer.sync(HoodieStreamer.java:205) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.utilities.streamer.HoodieMultiTableStreamer.sync(HoodieMultiTableStreamer.java:456) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.utilities.streamer.HoodieMultiTableStreamer.main(HoodieMultiTableStreamer.java:281) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_392] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_392] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_392] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_392] at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1066) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:192) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:215) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1158) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1167) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] Caused by: org.apache.hudi.exception.HoodieUpsertException: Error upserting bucketType UPDATE for partition :0 at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:342) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleInsertPartition(BaseSparkCommitActionExecutor.java:348) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$a3ab3c4$1(BaseSparkCommitActionExecutor.java:259) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:905) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:905) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:377) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:326) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.Task.run(Task.scala:141) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:563) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:566) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_392] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_392] at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_392] Caused by: org.apache.hudi.exception.HoodieException: org.apache.avro.SchemaParseException: Can't redefine: value at org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:149) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdateInternal(BaseSparkCommitActionExecutor.java:387) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:369) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:335) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleInsertPartition(BaseSparkCommitActionExecutor.java:348) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$a3ab3c4$1(BaseSparkCommitActionExecutor.java:259) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:905) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:905) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:377) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:326) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.Task.run(Task.scala:141) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:563) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:566) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_392] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_392] at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_392] Caused by: org.apache.avro.SchemaParseException: Can't redefine: value at org.apache.avro.Schema$Names.put(Schema.java:1586) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$NamedSchema.writeNameRef(Schema.java:844) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1011) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$ArraySchema.toJson(Schema.java:1173) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:1278) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:1039) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:1023) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema.toString(Schema.java:433) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema.toString(Schema.java:405) ~[avro-1.11.1.jar:1.11.1] at org.apache.avro.Schema.toString(Schema.java:396) ~[avro-1.11.1.jar:1.11.1] at org.apache.parquet.avro.AvroReadSupport.setAvroReadSchema(AvroReadSupport.java:73) ~[org.apache.parquet_parquet-avro-1.12.3.jar:1.12.3] at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:162) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:94) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.io.storage.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:73) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:126) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdateInternal(BaseSparkCommitActionExecutor.java:387) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:369) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:335) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleInsertPartition(BaseSparkCommitActionExecutor.java:348) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$a3ab3c4$1(BaseSparkCommitActionExecutor.java:259) ~[hudi-utilities-bundle_2.12-0.14.0-amzn-0.jar:0.14.0-amzn-0] at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:905) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:905) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:377) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:326) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.scheduler.Task.run(Task.scala:141) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:563) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:566) ~[spark-core_2.12-3.4.1-amzn-2.jar:3.4.1-amzn-2] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_392] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_392]` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
