Limess edited a comment on issue #2688:
URL: https://github.com/apache/hudi/issues/2688#issuecomment-844994858
I've seen the same issue with `emr-5.33.0` - `Spark 2.4.7`, `Hudi 0.7.0`. I
tried to load the above dependencies and that didn't resolve the issue (I may
have done this incorrectly, we're using pyspark and we're new to EMR/spark in
general)
```python
hudi_options = {
"hoodie.table.name": hudi_table_name,
"hoodie.datasource.write.table.type": "MERGE_ON_READ",
"hoodie.datasource.write.operation": "upsert",
"hoodie.datasource.write.recordkey.field": "_id",
"hoodie.datasource.write.precombine.field": "processed_date",
"hoodie.datasource.write.partitionpath.field": "story_published_date",
"hoodie.insert.shuffle.parallelism": "12",
"hoodie.upsert.shuffle.parallelism": "12",
"hoodie.bulkinsert.shuffle.parallelism": "12",
"hoodie.deltastreamer.keygen.timebased.timestamp.type": "DATE_STRING",
"hoodie.deltastreamer.keygen.timebased.input.dateformat":
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
"hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex":
",",
"hoodie.deltastreamer.keygen.timebased.input.timezone": "",
"hoodie.deltastreamer.keygen.timebased.output.dateformat": "yyyy-MM-dd",
"hoodie.deltastreamer.keygen.timebased.output.timezone": "UTC",
"hoodie.datasource.write.hive_style_partitioning": "true",
"hoodie.datasource.hive_sync.enable": "true",
"hoodie.datasource.hive_sync.database":
"pipeline_reprocessing_hudi_experiment",
"hoodie.datasource.hive_sync.table": hudi_table_name,
"hoodie.datasource.hive_sync.partition_fields": "story_published_date",
"hoodie.datasource.hive_sync.use_jdbc": "false",
"hoodie.consistency.check.enabled": True
}
```
```
py4j.protocol.Py4JJavaError: An error occurred while calling o127.save.
: java.lang.NoSuchMethodError: org.apache.hadoop.hive.ql.Driver.close()V
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:417)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:384)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:374)
at
org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:263)
at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:181)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:136)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:98)
at
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:355)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4(HoodieSparkSqlWriter.scala:403)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4$adapted(HoodieSparkSqlWriter.scala:399)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:399)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:460)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:218)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:185)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:223)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:220)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:134)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:133)
at
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
```
EDIT: also a similar issue on `emr-5.32.0`.
```
py4j.protocol.Py4JJavaError: An error occurred while calling o127.save.
: java.lang.NoSuchMethodError: org.apache.hadoop.hive.ql.Driver.close()V
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:417)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:384)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:374)
at
org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:263)
at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:181)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:136)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:98)
at
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:355)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4(HoodieSparkSqlWriter.scala:403)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4$adapted(HoodieSparkSqlWriter.scala:399)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:399)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:460)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:218)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:185)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:223)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:220)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:134)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:133)
at
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]