410680876f1 opened a new issue #2838:
URL: https://github.com/apache/hudi/issues/2838


   I tried to upgrade hudi to version 0.8.0 with set "hoodie.metadata.enable" 
true,but i throw an error when spark writting data to hudi
   
   2021/04/16 14:49:03,837 
[WARN][Class->HoodieBackedTableMetadataWriter][Method->bootstrapIfNeeded]: 
Metadata Table will need to be re-bootstrapped as no instants were found
   2021/04/16 14:49:04,047 [ERROR][Class->ApplicationMaster][Method->logError]: 
User class threw exception: org.apache.hudi.exception.HoodieMetadataException: 
Error syncing to metadata table.
   org.apache.hudi.exception.HoodieMetadataException: Error syncing to metadata 
table.
        at 
org.apache.hudi.client.SparkRDDWriteClient.syncTableMetadata(SparkRDDWriteClient.java:447)
        at 
org.apache.hudi.client.AbstractHoodieWriteClient.preWrite(AbstractHoodieWriteClient.java:400)
        at 
org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:153)
        at 
org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:214)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:186)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:145)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:81)
        at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
        at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
        at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:696)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:305)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:291)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:249)
        at com.leqee.sparktool.hoodie.HoodieData.saveData(HoodieData.scala:184)
        at com.leqee.sparktool.hoodie.HoodieData.upsert(HoodieData.scala:96)
        at com.leqee.sparktool.hoodie.Hoodie$class.upsert(Hoodie.scala:160)
        at com.leqee.sparktool.hoodie.HoodieData.upsert(HoodieData.scala:13)
        at 
com.leqee.datasync.synctool.sync.SyncTask.setLastSync(SyncTask.scala:235)
        at 
com.leqee.datasync.synctool.sync.SyncTask.batchSyncing(SyncTask.scala:159)
        at 
com.leqee.datasync.synctool.sync.DateTimeSync.syncAnalyze(DateTimeSync.scala:49)
        at com.leqee.datasync.synctool.sync.SyncTask.sync(SyncTask.scala:66)
        at 
com.leqee.datasync.synctool.sync.SyncProxy.com$leqee$datasync$synctool$sync$SyncProxy$$runSync(SyncProxy.scala:49)
        at 
com.leqee.datasync.synctool.sync.SyncProxy$$anonfun$workFunc$1.apply(SyncProxy.scala:55)
        at 
com.leqee.datasync.synctool.sync.SyncProxy$$anonfun$workFunc$1.apply(SyncProxy.scala:55)
        at 
com.leqee.datasync.synctool.sync.SyncProxy$$anonfun$runGroup$1.apply(SyncProxy.scala:81)
        at 
com.leqee.datasync.synctool.sync.SyncProxy$$anonfun$runGroup$1.apply(SyncProxy.scala:80)
        at 
scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
        at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
        at 
com.leqee.datasync.synctool.sync.SyncProxy.runGroup(SyncProxy.scala:80)
        at com.leqee.datasync.synctool.SyncApp$.main(SyncApp.scala:61)
        at com.leqee.datasync.synctool.SyncApp.main(SyncApp.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:685)
   Caused by: java.lang.ArrayIndexOutOfBoundsException: 2
        at org.apache.hudi.common.fs.FSUtils.getCommitTime(FSUtils.java:153)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$null$2(HoodieBackedTableMetadataWriter.java:316)
        at 
java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:174)
        at 
java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
        at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
        at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
        at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
        at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
        at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
        at 
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$bootstrapFromFilesystem$4(HoodieBackedTableMetadataWriter.java:318)
        at java.util.HashMap.forEach(HashMap.java:1289)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.bootstrapFromFilesystem(HoodieBackedTableMetadataWriter.java:312)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.bootstrapIfNeeded(HoodieBackedTableMetadataWriter.java:266)
        at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.initialize(SparkHoodieBackedTableMetadataWriter.java:91)
        at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.<init>(HoodieBackedTableMetadataWriter.java:113)
        at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.<init>(SparkHoodieBackedTableMetadataWriter.java:62)
        at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.create(SparkHoodieBackedTableMetadataWriter.java:58)
        at 
org.apache.hudi.client.SparkRDDWriteClient.syncTableMetadata(SparkRDDWriteClient.java:444)
        ... 49 more
   
   
   **Environment Description**
   
   * Hudi version :0.8.0
   
   * Spark version :2.4.7
   
   * Hadoop version :3.1.1
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to