andykrk opened a new issue #4604:
URL: https://github.com/apache/hudi/issues/4604
Environment Description
Hudi version : 0.9.0
Spark version : 2.4.7
Storage (HDFS/S3/GCS..) : S3
Config:
'hoodie.datasource.write.precombine.field': 'col99',
'hoodie.datasource.write.recordkey.field': 'col1',
'hoodie.table.name': 'target_table',
'hoodie.insert.shuffle.parallelism': 50,
'hoodie.upsert.shuffle.parallelism': 50,
'hoodie.bulkinsert.shuffle.parallelism': 50,
'hoodie.finalize.write.parallelism': 50,
'hoodie.keep.min.commits': 29,
'hoodie.keep.max.commits': 30,
'hoodie.cleaner.commits.retained': 28,
'hoodie.metadata.enable': False,
'hoodie.datasource.write.table.type': 'COPY_ON_WRITE',
'hoodie.datasource.write.operation': 'UPSERT',
'hoodie.datasource.write.row.writer.enable': False}
Issue Description:
I am trying to insert the data into target table along with performing
archiving and cleaning as I currently have more commits than defined in the
config. I test this same process with several variations of the write
configuration to determine if the write config was causing the issue. These
variations include enabling/disabling the row writer, the metadata table, and
eventually worked my way down to removing all non required write configuration
options. However, this did not seem to resolve the issue. During run I am
getting following exception:
An error occurred while calling o286.save.
: org.apache.hudi.exception.HoodieCommitException: Failed to archive commits
at
org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:318)
at
org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128)
at
org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:439)
at
org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191)
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:124)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:614)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:272)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:164)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:156)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException: null of string of map of union in
field extraMetadata of org.apache.hudi.avro.model.HoodieCommitMetadata of union
in field hoodieCommitMetadata of
org.apache.hudi.avro.model.HoodieArchivedMetaEntry
at
org.apache.avro.generic.GenericDatumWriter.npe(GenericDatumWriter.java:145)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:139)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:62)
at
org.apache.hudi.common.table.log.block.HoodieAvroDataBlock.serializeRecords(HoodieAvroDataBlock.java:106)
at
org.apache.hudi.common.table.log.block.HoodieDataBlock.getContentBytes(HoodieDataBlock.java:97)
at
org.apache.hudi.common.table.log.HoodieLogFormatWriter.appendBlocks(HoodieLogFormatWriter.java:164)
at
org.apache.hudi.common.table.log.HoodieLogFormatWriter.appendBlock(HoodieLogFormatWriter.java:142)
at
org.apache.hudi.table.HoodieTimelineArchiveLog.writeToFile(HoodieTimelineArchiveLog.java:334)
at
org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:307)
... 39 more
Caused by: java.lang.NullPointerException
at org.apache.avro.io.Encoder.writeString(Encoder.java:121)
at
org.apache.avro.generic.GenericDatumWriter.writeString(GenericDatumWriter.java:267)
at
org.apache.avro.generic.GenericDatumWriter.writeString(GenericDatumWriter.java:262)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:128)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
at
org.apache.avro.generic.GenericDatumWriter.writeMap(GenericDatumWriter.java:234)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:121)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:125)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
at
org.apache.avro.generic.GenericDatumWriter.writeField(GenericDatumWriter.java:166)
at
org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:156)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:118)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:125)
at
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
at
org.apache.avro.generic.GenericDatumWriter.writeField(GenericDatumWriter.java:166)
at
org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:156)
at
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:118)
... 47 more
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]