rubenssoto edited a comment on issue #1679:
URL: https://github.com/apache/hudi/issues/1679#issuecomment-785433334
I give another try on
EMR 5.32
Hudi 0.6.0
**First ERROR:**
`21/02/24 22:27:25 WARN FileUtils: Error setting permissions of
hdfs://ip-10-0-28-211.us-west-2.compute.internal:8020/user/spark/warehouse/raw_courier_api_hudi.db
java.io.IOException: Unable to set permissions of
hdfs://ip-10-0-28-211.us-west-2.compute.internal:8020/user/spark/warehouse/raw_courier_api_hudi.db
at
org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:758)
at org.apache.hadoop.hive.common.FileUtils.mkdir(FileUtils.java:527)
at org.apache.hadoop.hive.metastore.Warehouse.mkdirs(Warehouse.java:201)
at
com.amazonaws.glue.catalog.util.MetastoreClientUtils.makeDirs(MetastoreClientUtils.java:43)
at
com.amazonaws.glue.catalog.metastore.GlueMetastoreClientDelegate.createDatabase(GlueMetastoreClientDelegate.java:236)
at
com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient.createDatabase(AWSCatalogMetastoreClient.java:272)
at org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:316)
at
org.apache.hadoop.hive.ql.exec.DDLTask.createDatabase(DDLTask.java:3895)
at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:271)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1653)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1412)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1195)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:384)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:367)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:357)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:121)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:94)
at
org.apache.hudi.HoodieSparkSqlWriter$.org$apache$hudi$HoodieSparkSqlWriter$$syncHive(HoodieSparkSqlWriter.scala:321)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$metaSync$2.apply(HoodieSparkSqlWriter.scala:363)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$metaSync$2.apply(HoodieSparkSqlWriter.scala:359)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:359)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:417)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:205)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:125)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:173)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:169)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:197)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:194)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:114)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:112)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
at
org.apache.spark.sql.execution.SQLExecution$.org$apache$spark$sql$execution$SQLExecution$$executeQuery$1(SQLExecution.scala:83)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1$$anonfun$apply$1.apply(SQLExecution.scala:94)
at
org.apache.spark.sql.execution.QueryExecutionMetrics$.withMetrics(QueryExecutionMetrics.scala:141)
at
org.apache.spark.sql.execution.SQLExecution$.org$apache$spark$sql$execution$SQLExecution$$withMetrics(SQLExecution.scala:178)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:93)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:200)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:92)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:696)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:305)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:291)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:249)
at hudiwriter.HudiWriter.createHudiTable(HudiWriter.scala:48)
at hudiwriter.HudiContext.writeToHudi(HudiContext.scala:38)
at jobs.TableProcessor.start(TableProcessor.scala:77)
at
TableProcessorWrapper$$anonfun$1$$anonfun$apply$1.apply$mcV$sp(TableProcessorWrapper.scala:23)
at
TableProcessorWrapper$$anonfun$1$$anonfun$apply$1.apply(TableProcessorWrapper.scala:23)
at
TableProcessorWrapper$$anonfun$1$$anonfun$apply$1.apply(TableProcessorWrapper.scala:23)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
at
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
at
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
Caused by: java.lang.NumberFormatException: For input string:
"testingforemptydefaultvalue"
at
java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Integer.parseInt(Integer.java:580)
at java.lang.Integer.parseInt(Integer.java:615)
at org.apache.hadoop.conf.Configuration.getInts(Configuration.java:1402)
at
org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:332)
at
org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:355)
at org.apache.hadoop.fs.FsShell.init(FsShell.java:96)
at org.apache.hadoop.fs.FsShell.run(FsShell.java:296)
at
org.apache.hadoop.hive.shims.HadoopShimsSecure.run(HadoopShimsSecure.java:377)
at
org.apache.hadoop.hive.shims.Hadoop23Shims.setFullFileStatus(Hadoop23Shims.java:729)
... 66 more`
Second ERROR:
`Exception in thread "ForkJoinPool-1-worker-2"
java.lang.NoClassDefFoundError: org/json/JSONException
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:10847)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:10047)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10128)
at
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:209)
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:227)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:424)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:308)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1122)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1170)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:384)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:367)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:357)
at
org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:262)
at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:176)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:130)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:94)
at
org.apache.hudi.HoodieSparkSqlWriter$.org$apache$hudi$HoodieSparkSqlWriter$$syncHive(HoodieSparkSqlWriter.scala:321)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$metaSync$2.apply(HoodieSparkSqlWriter.scala:363)
at
org.apache.hudi.HoodieSparkSqlWriter$$anonfun$metaSync$2.apply(HoodieSparkSqlWriter.scala:359)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:359)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:417)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:205)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:125)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:173)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:169)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:197)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:194)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:114)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:112)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
at
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:696)
at
org.apache.spark.sql.execution.SQLExecution$.org$apache$spark$sql$execution$SQLExecution$$executeQuery$1(SQLExecution.scala:83)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1$$anonfun$apply$1.apply(SQLExecution.scala:94)
at
org.apache.spark.sql.execution.QueryExecutionMetrics$.withMetrics(QueryExecutionMetrics.scala:141)
at
org.apache.spark.sql.execution.SQLExecution$.org$apache$spark$sql$execution$SQLExecution$$withMetrics(SQLExecution.scala:178)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:93)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:200)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:92)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:696)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:305)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:291)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:249)
at hudiwriter.HudiWriter.createHudiTable(HudiWriter.scala:48)
at hudiwriter.HudiContext.writeToHudi(HudiContext.scala:38)
at jobs.TableProcessor.start(TableProcessor.scala:77)
at
TableProcessorWrapper$$anonfun$1$$anonfun$apply$1.apply$mcV$sp(TableProcessorWrapper.scala:23)
at
TableProcessorWrapper$$anonfun$1$$anonfun$apply$1.apply(TableProcessorWrapper.scala:23)
at
TableProcessorWrapper$$anonfun$1$$anonfun$apply$1.apply(TableProcessorWrapper.scala:23)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
at
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
at
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
Caused by: java.lang.ClassNotFoundException: org.json.JSONException
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 64 more`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]