AbhijeetSachdev1 commented on issue #8210:
URL: https://github.com/apache/hudi/issues/8210#issuecomment-1478453315

   The problem lies in archive files. When I tried to merge them then also I am 
getting the same exception.
   
   Do we have any way of cleaning archive files ? I cannot find any 
configuration. please suggest,
   
   
   
   client token: N/A
         diagnostics: User class threw exception: 
org.apache.hudi.exception.HoodieCommitException: Failed to merge small archive 
files
        at 
org.apache.hudi.client.HoodieTimelineArchiver.mergeArchiveFiles(HoodieTimelineArchiver.java:354)
        at 
org.apache.hudi.client.HoodieTimelineArchiver.mergeArchiveFilesIfNecessary(HoodieTimelineArchiver.java:226)
        at 
org.apache.hudi.client.HoodieTimelineArchiver.archiveIfRequired(HoodieTimelineArchiver.java:179)
        at 
org.apache.hudi.client.BaseHoodieWriteClient.archive(BaseHoodieWriteClient.java:909)
        at 
org.apache.hudi.client.BaseHoodieWriteClient.autoArchiveOnCommit(BaseHoodieWriteClient.java:629)
        at 
org.apache.hudi.client.BaseHoodieWriteClient.postCommit(BaseHoodieWriteClient.java:534)
        at 
org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:237)
        at 
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:125)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:714)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:340)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:144)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:103)
        at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
        at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
        at 
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:114)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$7(SQLExecution.scala:139)
        at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
        at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:139)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:245)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:138)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:100)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:96)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:615)
        at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:177)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:615)
        at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
        at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
        at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
        at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
        at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:591)
        at 
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:96)
        at 
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:83)
        at 
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:81)
        at 
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:124)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:390)
        at 
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:363)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
        at 
com.amazon.fdvstreams.v2.hudi.HoodieAdapter.saveDf(HoodieAdapter.scala:54)
        at 
com.amazon.fdvstreams.v2.hudi.HoodieAdapter.upsert(HoodieAdapter.scala:42)
        at 
com.amazon.fdvstreams.v3.plugins.load.DefaultDataFrameCompactor.processHoodieUpsert(DefaultDataFrameCompactor.scala:72)
        at 
com.amazon.fdvstreams.v3.plugins.load.DefaultDataFrameCompactor.upsertDf(DefaultDataFrameCompactor.scala:43)
        at 
com.amazon.fdvstreams.v3.plugins.load.DefaultDataFrameCompactor.compactDf(DefaultDataFrameCompactor.scala:37)
        at 
com.amazon.fdvstreams.v3.CompactionJob$.$anonfun$run$2(CompactionJob.scala:50)
        at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
        at scala.collection.Iterator.foreach(Iterator.scala:943)
        at scala.collection.Iterator.foreach$(Iterator.scala:943)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
        at com.amazon.fdvstreams.v3.CompactionJob$.run(CompactionJob.scala:53)
        at com.amazon.fdvstreams.v3.CompactionJob$.main(CompactionJob.scala:32)
        at com.amazon.fdvstreams.v3.CompactionJob.main(CompactionJob.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:742)
   Caused by: org.apache.avro.AvroTypeException: Invalid default for field 
operationType: "" not a ["null",{"type":"string","avro.java.string":"String"}]
        at org.apache.avro.Schema.validateDefault(Schema.java:1581)
        at org.apache.avro.Schema.access$500(Schema.java:92)
        at org.apache.avro.Schema$Field.<init>(Schema.java:552)
        at org.apache.avro.Schema.parse(Schema.java:1688)
        at org.apache.avro.Schema.parse(Schema.java:1765)
        at org.apache.avro.Schema.parse(Schema.java:1678)
        at org.apache.avro.Schema$Parser.parse(Schema.java:1433)
        at org.apache.avro.Schema$Parser.parse(Schema.java:1421)
        at 
org.apache.hudi.common.table.log.block.HoodieAvroDataBlock$RecordIterator.getInstance(HoodieAvroDataBlock.java:180)
        at 
org.apache.hudi.common.table.log.block.HoodieAvroDataBlock.deserializeRecords(HoodieAvroDataBlock.java:141)
        at 
org.apache.hudi.common.table.log.block.HoodieDataBlock.readRecordsFromBlockPayload(HoodieDataBlock.java:189)
        at 
org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:147)
        at 
org.apache.hudi.client.HoodieTimelineArchiver.mergeArchiveFiles(HoodieTimelineArchiver.java:345)
        ... 63 more


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to