AbhijeetSachdev1 commented on issue #8210:
URL: https://github.com/apache/hudi/issues/8210#issuecomment-1478453315
The problem lies in archive files. When I tried to merge them then also I am
getting the same exception.
Do we have any way of cleaning archive files ? I cannot find any
configuration. please suggest,
client token: N/A
diagnostics: User class threw exception:
org.apache.hudi.exception.HoodieCommitException: Failed to merge small archive
files
at
org.apache.hudi.client.HoodieTimelineArchiver.mergeArchiveFiles(HoodieTimelineArchiver.java:354)
at
org.apache.hudi.client.HoodieTimelineArchiver.mergeArchiveFilesIfNecessary(HoodieTimelineArchiver.java:226)
at
org.apache.hudi.client.HoodieTimelineArchiver.archiveIfRequired(HoodieTimelineArchiver.java:179)
at
org.apache.hudi.client.BaseHoodieWriteClient.archive(BaseHoodieWriteClient.java:909)
at
org.apache.hudi.client.BaseHoodieWriteClient.autoArchiveOnCommit(BaseHoodieWriteClient.java:629)
at
org.apache.hudi.client.BaseHoodieWriteClient.postCommit(BaseHoodieWriteClient.java:534)
at
org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:237)
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:125)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:714)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:340)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:144)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:103)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:114)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$7(SQLExecution.scala:139)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:139)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:245)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:138)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:100)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:96)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:615)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:177)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:615)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:591)
at
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:96)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:83)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:81)
at
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:124)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:390)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:363)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
at
com.amazon.fdvstreams.v2.hudi.HoodieAdapter.saveDf(HoodieAdapter.scala:54)
at
com.amazon.fdvstreams.v2.hudi.HoodieAdapter.upsert(HoodieAdapter.scala:42)
at
com.amazon.fdvstreams.v3.plugins.load.DefaultDataFrameCompactor.processHoodieUpsert(DefaultDataFrameCompactor.scala:72)
at
com.amazon.fdvstreams.v3.plugins.load.DefaultDataFrameCompactor.upsertDf(DefaultDataFrameCompactor.scala:43)
at
com.amazon.fdvstreams.v3.plugins.load.DefaultDataFrameCompactor.compactDf(DefaultDataFrameCompactor.scala:37)
at
com.amazon.fdvstreams.v3.CompactionJob$.$anonfun$run$2(CompactionJob.scala:50)
at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at com.amazon.fdvstreams.v3.CompactionJob$.run(CompactionJob.scala:53)
at com.amazon.fdvstreams.v3.CompactionJob$.main(CompactionJob.scala:32)
at com.amazon.fdvstreams.v3.CompactionJob.main(CompactionJob.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:742)
Caused by: org.apache.avro.AvroTypeException: Invalid default for field
operationType: "" not a ["null",{"type":"string","avro.java.string":"String"}]
at org.apache.avro.Schema.validateDefault(Schema.java:1581)
at org.apache.avro.Schema.access$500(Schema.java:92)
at org.apache.avro.Schema$Field.<init>(Schema.java:552)
at org.apache.avro.Schema.parse(Schema.java:1688)
at org.apache.avro.Schema.parse(Schema.java:1765)
at org.apache.avro.Schema.parse(Schema.java:1678)
at org.apache.avro.Schema$Parser.parse(Schema.java:1433)
at org.apache.avro.Schema$Parser.parse(Schema.java:1421)
at
org.apache.hudi.common.table.log.block.HoodieAvroDataBlock$RecordIterator.getInstance(HoodieAvroDataBlock.java:180)
at
org.apache.hudi.common.table.log.block.HoodieAvroDataBlock.deserializeRecords(HoodieAvroDataBlock.java:141)
at
org.apache.hudi.common.table.log.block.HoodieDataBlock.readRecordsFromBlockPayload(HoodieDataBlock.java:189)
at
org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:147)
at
org.apache.hudi.client.HoodieTimelineArchiver.mergeArchiveFiles(HoodieTimelineArchiver.java:345)
... 63 more
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]