rubenssoto commented on issue #2515:
URL: https://github.com/apache/hudi/issues/2515#issuecomment-773020297


   I made a new try...so...
   emr-6.2.0
   spark 3.0.1
   
   git clone https://github.com/apache/hudi.git && cd hudi
   
   mvn clean package -DskipTests -Dspark3
   
   spark submit:
   `spark-submit --deploy-mode cluster --conf spark.executor.cores=5 --conf 
spark.executor.memoryOverhead=2000 --conf spark.executor.memory=32g --conf 
spark.yarn.maxAppAttempts=1 --conf spark.dynamicAllocation.maxExecutors=4 
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer --packages 
org.apache.spark:spark-avro_2.12:2.4.4 --jars 
s3://dl/lib/spark-daria_2.12-0.38.2.jar,s3://ze-data-etl/lib/hudi-spark-bundle_2.12-0.8.0-SNAPSHOT.jar
 --class TableProcessorWrapper 
s3://dl//code/projects/data_projects/batch_processor_engine/batch-processor-engine_2.12-3.0.1_0.5.jar
 courier_api_group01`
   
   
   ```
   trait SparkSessionWrapper extends Serializable {
   
     val spark = {
       SparkSession.builder
         .appName("Batch Processor Engine")
         .config(
           "spark.jars.packages",
           "org.apache.spark:spark-avro_2.12:2.4.4"
         )
         .config("spark.jars", 
"s3://dl/lib/hudi-spark-bundle_2.12-0.8.0-SNAPSHOT.jar")
         .config("spark.serializer", 
"org.apache.spark.serializer.KryoSerializer")
         .enableHiveSupport()
         .getOrCreate()
     }
   
   }
   ```
   
   Same Error :(
   
   
   ```
   Error on Table: deliveryman_route_history, Error Message: 
org.apache.hudi.exception.HoodieCommitException: Failed to archive commits
   java.lang.Exception: Error on Table: deliveryman_route_history, Error 
Message: org.apache.hudi.exception.HoodieCommitException: Failed to archive 
commits
        at jobs.TableProcessor.start(TableProcessor.scala:102)
        at 
TableProcessorWrapper$.$anonfun$main$2(TableProcessorWrapper.scala:23)
        at 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at 
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
        at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
        at 
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
        at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
        at 
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
   ```
   
   ```
   21/02/04 04:05:40 ERROR HoodieTimelineArchiveLog: Failed to archive commits, 
.commit file: 20210203023129.commit.requested
   java.lang.NullPointerException: null of string of map of union in field 
extraMetadata of org.apache.hudi.avro.model.HoodieCommitMetadata of union in 
field hoodieCommitMetadata of org.apache.hudi.avro.model.HoodieArchivedMetaEntry
        at 
org.apache.avro.generic.GenericDatumWriter.npe(GenericDatumWriter.java:145)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:139)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:62)
        at 
org.apache.hudi.common.table.log.block.HoodieAvroDataBlock.serializeRecords(HoodieAvroDataBlock.java:106)
        at 
org.apache.hudi.common.table.log.block.HoodieDataBlock.getContentBytes(HoodieDataBlock.java:97)
        at 
org.apache.hudi.common.table.log.HoodieLogFormatWriter.appendBlocks(HoodieLogFormatWriter.java:164)
        at 
org.apache.hudi.common.table.log.HoodieLogFormatWriter.appendBlock(HoodieLogFormatWriter.java:142)
        at 
org.apache.hudi.table.HoodieTimelineArchiveLog.writeToFile(HoodieTimelineArchiveLog.java:361)
        at 
org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:311)
        at 
org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:138)
        at 
org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:426)
        at 
org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:188)
        at 
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:110)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:443)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:218)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123)
        at 
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:963)
        at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
        at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
        at 
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:132)
        at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
        at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:132)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:131)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:963)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:415)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:399)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:288)
        at hudiwriter.HudiWriter.merge(HudiWriter.scala:72)
        at hudiwriter.HudiContext.writeToHudi(HudiContext.scala:35)
        at jobs.TableProcessor.start(TableProcessor.scala:84)
        at 
TableProcessorWrapper$.$anonfun$main$2(TableProcessorWrapper.scala:23)
        at 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at 
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
        at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
        at 
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
        at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
        at 
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
   Caused by: java.lang.NullPointerException
        at org.apache.avro.io.Encoder.writeString(Encoder.java:121)
        at 
org.apache.avro.generic.GenericDatumWriter.writeString(GenericDatumWriter.java:267)
        at 
org.apache.avro.generic.GenericDatumWriter.writeString(GenericDatumWriter.java:262)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:128)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
        at 
org.apache.avro.generic.GenericDatumWriter.writeMap(GenericDatumWriter.java:234)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:121)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:125)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
        at 
org.apache.avro.generic.GenericDatumWriter.writeField(GenericDatumWriter.java:166)
        at 
org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:156)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:118)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:125)
        at 
org.apache.avro.generic.GenericDatumWriter.write(GenericDatumWriter.java:75)
        at 
org.apache.avro.generic.GenericDatumWriter.writeField(GenericDatumWriter.java:166)
        at 
org.apache.avro.generic.GenericDatumWriter.writeRecord(GenericDatumWriter.java:156)
        at 
org.apache.avro.generic.GenericDatumWriter.writeWithoutConversion(GenericDatumWriter.java:118)
        ... 59 more
   ```
   
   
   Map(hoodie.datasource.hive_sync.database -> raw_courier_api_hudi, 
   hoodie.parquet.small.file.limit -> 67108864, 
   hoodie.copyonwrite.record.size.estimate -> 1024, 
   hoodie.datasource.write.precombine.field -> LineCreatedTimestamp, 
   hoodie.datasource.hive_sync.partition_fields -> partitionpath, 
   hoodie.datasource.hive_sync.partition_extractor_class -> 
org.apache.hudi.hive.MultiPartKeysValueExtractor, hoodie.parquet.max.file.size 
-> 134217728, 
   hoodie.parquet.block.size -> 67108864, 
   hoodie.datasource.hive_sync.table -> customer_notification, 
   hoodie.datasource.write.operation -> upsert, 
   hoodie.datasource.hive_sync.enable -> true, 
   hoodie.datasource.write.recordkey.field -> id, 
   hoodie.table.name -> customer_notification, 
   hoodie.datasource.hive_sync.jdbcurl -> 
jdbc:hive2://ip-10-0-26-106.us-west-2.compute.internal:10000, 
hoodie.datasource.write.hive_style_partitioning -> true, 
   hoodie.consistency.check.enabled -> true, 
   hoodie.datasource.write.table.name -> customer_notification, 
   hoodie.datasource.write.keygenerator.class -> 
org.apache.hudi.keygen.SimpleKeyGenerator, hoodie.upsert.shuffle.parallelism -> 
50)
   
   
   Im using avro org.apache.spark:spark-avro_2.12:2.4.4 in spark 3, it could be 
the problem? I update to spark-avro_2.12-3.0.1.jar and the problem was not 
solved.
   
   Do I need to recreate the tables?
   
   thank you so much


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to