[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Labels: hudi-on-call sev:critical user-support-issues (was: sev:critical user-support-issues) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, spark, writer-core >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Assignee: Alexey Kudinkin >Priority: Critical > Labels: hudi-on-call, sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Fix Version/s: (was: 0.11.0) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Critical > Labels: sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Component/s: Writer Core > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration, Writer Core >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Critical > Labels: sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Danny Chen updated HUDI-864: Fix Version/s: (was: 0.10.0) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Critical > Labels: sev:critical, user-support-issues > Fix For: 0.11.0 > > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Priority: Critical (was: Blocker) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Critical > Labels: sev:critical, user-support-issues > Fix For: 0.10.0 > > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Status: Open (was: New) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Blocker > Labels: sev:critical, user-support-issues > Fix For: 0.10.0 > > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Fix Version/s: 0.10.0 > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Blocker > Labels: sev:critical, user-support-issues > Fix For: 0.10.0 > > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Affects Version/s: 0.6.0 0.5.3 0.7.0 0.8.0 > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.6.0, 0.5.3, 0.7.0, 0.8.0, 0.9.0 >Reporter: Roland Johann >Priority: Blocker > Labels: sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Affects Version/s: 0.9.0 > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2, 0.9.0 >Reporter: Roland Johann >Priority: Blocker > Labels: sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by:
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Priority: Blocker (was: Major) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Blocker > Labels: sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by:
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Labels: sev:critical user-support-issues (was: sev:high user-support-issues) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > Labels: sev:critical, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Component/s: Spark Integration > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core, Spark Integration >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > Labels: sev:high, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by:
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Component/s: Common Core > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > Labels: sev:high, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException: >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Raymond Xu updated HUDI-864: Labels: sev:high user-support-issues (was: sevv:high user-support-issues) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > Labels: sev:high, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException:
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] sivabalan narayanan updated HUDI-864: - Labels: sevv:high user-support-issues (was: user-support-issues) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > Labels: sevv:high, user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by:
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Labels: user-support-issues (was: ) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi > Issue Type: Bug >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > Labels: user-support-issues > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException: >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Labels: (was: bug-bash-0.6.0) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi (incubating) > Issue Type: Bug >Reporter: Roland Johann >Priority: Major > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Affects Version/s: 0.5.2 > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi (incubating) > Issue Type: Bug >Affects Versions: 0.5.2 >Reporter: Roland Johann >Priority: Major > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: >
[jira] [Updated] (HUDI-864) parquet schema conflict: optional binary (UTF8) is not a group
[ https://issues.apache.org/jira/browse/HUDI-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vinoth Chandar updated HUDI-864: Labels: bug-bash-0.6.0 (was: ) > parquet schema conflict: optional binary (UTF8) is not a group > --- > > Key: HUDI-864 > URL: https://issues.apache.org/jira/browse/HUDI-864 > Project: Apache Hudi (incubating) > Issue Type: Bug >Reporter: Roland Johann >Priority: Major > Labels: bug-bash-0.6.0 > > When dealing with struct types like this > {code:json} > { > "type": "struct", > "fields": [ > { > "name": "categoryResults", > "type": { > "type": "array", > "elementType": { > "type": "struct", > "fields": [ > { > "name": "categoryId", > "type": "string", > "nullable": true, > "metadata": {} > } > ] > }, > "containsNull": true > }, > "nullable": true, > "metadata": {} > } > ] > } > {code} > The second ingest batch throws that exception: > {code} > ERROR [Executor task launch worker for task 15] > commit.BaseCommitActionExecutor (BaseCommitActionExecutor.java:264) - Error > upserting bucketType UPDATE for partition :0 > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: operation has failed > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdateInternal(CommitActionExecutor.java:100) > at > org.apache.hudi.table.action.commit.CommitActionExecutor.handleUpdate(CommitActionExecutor.java:76) > at > org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor.handleUpdate(DeltaCommitActionExecutor.java:73) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleUpsertPartition(BaseCommitActionExecutor.java:258) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.handleInsertPartition(BaseCommitActionExecutor.java:271) > at > org.apache.hudi.table.action.commit.BaseCommitActionExecutor.lambda$execute$caffe4c4$1(BaseCommitActionExecutor.java:104) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$25.apply(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337) > at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: >