rubenssoto opened a new issue #2688:
URL: https://github.com/apache/hudi/issues/2688
Hello,
I'm trying to sync with hive in EMR 6.2 using metastore
Emr Version: 6.2
Hudi Version: 6.0.0/7.0.0
Spark Session:
spark_session = (
SparkSession.builder.appName(spark_application_name)
.enableHiveSupport()
.getOrCreate()
)
`hudi_options_bulk = {
'hoodie.table.name': tableName,
'hoodie.datasource.write.recordkey.field': 'id',
'hoodie.datasource.write.table.name': tableName,
'hoodie.datasource.write.operation': 'bulk_insert',
'hoodie.datasource.write.precombine.field': 'LineCreatedTimestamp',
'hoodie.bulkinsert.shuffle.parallelism': 20,
'hoodie.parquet.small.file.limit': 268435456,
'hoodie.parquet.max.file.size': 536870912,
'hoodie.parquet.block.size': 268435456,
'hoodie.copyonwrite.record.size.estimate':512,
'hoodie.datasource.hive_sync.enable': 'true',
'hoodie.datasource.write.hive_style_partitioning': 'true',
'hoodie.datasource.hive_sync.database': 'true',
'hoodie.datasource.hive_sync.use_jdbc': 'false',
'hoodie.datasource.hive_sync.table': tableName,
'hoodie.datasource.hive_sync.database': 'raw_courier_api_hudi',
'hoodie.datasource.hive_sync.jdbcurl':
'jdbc:hive2://ip-10-0-48-177.us-west-2.compute.internal:10000'
}`
**Hudi 0.7.0**
`py4j.protocol.Py4JJavaError: An error occurred while calling o118.save.
: java.lang.NoClassDefFoundError:
org/apache/calcite/rel/type/RelDataTypeSystem
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory.get(SemanticAnalyzerFactory.java:318)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:484)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1317)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1457)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:401)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:384)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:374)
at
org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:263)
at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:181)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:136)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:94)
at
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:355)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4(HoodieSparkSqlWriter.scala:403)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4$adapted(HoodieSparkSqlWriter.scala:399)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:399)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:460)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:218)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123)
at
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:963)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:132)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:132)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:131)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:963)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:399)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:288)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException:
org.apache.calcite.rel.type.RelDataTypeSystem
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 59 more
`
**Hudi 0.6.0**
`An error was encountered:
An error occurred while calling o122.save.
: java.lang.NoSuchMethodError: org.apache.hadoop.hive.ql.Driver.close()V
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:400)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:367)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:357)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:121)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:94)
at
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:329)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4(HoodieSparkSqlWriter.scala:371)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4$adapted(HoodieSparkSqlWriter.scala:367)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:367)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:425)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:198)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:126)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123)
at
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:963)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:132)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:132)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:131)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:963)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:399)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:288)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Traceback (most recent call last):
File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py",
line 827, in save
self._jwrite.save(path)
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py",
line 1305, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line
128, in deco
return f(*a, **kw)
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py",
line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o122.save.
: java.lang.NoSuchMethodError: org.apache.hadoop.hive.ql.Driver.close()V
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:400)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:367)
at
org.apache.hudi.hive.HoodieHiveClient.updateHiveSQL(HoodieHiveClient.java:357)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:121)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:94)
at
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:329)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4(HoodieSparkSqlWriter.scala:371)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4$adapted(HoodieSparkSqlWriter.scala:367)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:367)
at
org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:425)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:198)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:126)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123)
at
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:963)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:132)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:104)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:227)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:132)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:131)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:963)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:399)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:288)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]