JosefinaArayaTapia commented on issue #5389:
URL: https://github.com/apache/hudi/issues/5389#issuecomment-1109905226
Hi @codope
Here is the table creation:
```
CREATE EXTERNAL TABLE 'bda.cliente'(
'_hoodie_commit_time' string,
'_hoodie_commit_seqno' string,
'_hoodie_record_key' string,
'_hoodie_partition_path' string,
'_hoodie_file_name' string,
'id' decimal(12,0),
'last_update_time' timestamp,
'cod_estado' string
)
PARTITIONED BY (
'periodo' int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
's3a://bck/HUDI/CLIENTE'
TBLPROPERTIES (
'bucketing_version'='2',
'last_commit_time_sync'='20220425221015',
'last_modified_by'='hive',
'last_modified_time'='1650924207')
```
-----
In EMR 6.5 with Hudi 0.9.0 , i have the following problem:
Configuration and call database:
```
%%configure -f
{
"conf": {
"spark.jars":"hdfs:///apps/hudi/lib/hudi-spark-bundle.jar,hdfs:///apps/hudi/lib/spark-avro.jar,hdfs:///apps/hudi/lib/aws-java-sdk-bundle-1.12.31.jar",
"spark.serializer":"org.apache.spark.serializer.KryoSerializer",
"spark.sql.extensions":"org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
}}
spark.sql("use bda")
```
```
An error was encountered:
An error occurred while calling o86.sql.
: java.lang.NoSuchMethodError:
com.amazonaws.transform.JsonUnmarshallerContext.getCurrentToken()Lcom/amazonaws/thirdparty/jackson/core/JsonToken;
at
com.amazonaws.services.glue.model.transform.GetDatabaseResultJsonUnmarshaller.unmarshall(GetDatabaseResultJsonUnmarshaller.java:39)
at
com.amazonaws.services.glue.model.transform.GetDatabaseResultJsonUnmarshaller.unmarshall(GetDatabaseResultJsonUnmarshaller.java:29)
at
com.amazonaws.http.JsonResponseHandler.handle(JsonResponseHandler.java:118)
at
com.amazonaws.http.JsonResponseHandler.handle(JsonResponseHandler.java:43)
at
com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1734)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1454)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1369)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1145)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:802)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:770)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:744)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:704)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:686)
at
com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:550)
at
com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:530)
at
com.amazonaws.services.glue.AWSGlueClient.doInvoke(AWSGlueClient.java:10640)
at
com.amazonaws.services.glue.AWSGlueClient.invoke(AWSGlueClient.java:10607)
at
com.amazonaws.services.glue.AWSGlueClient.invoke(AWSGlueClient.java:10596)
at
com.amazonaws.services.glue.AWSGlueClient.executeGetDatabase(AWSGlueClient.java:4466)
at
com.amazonaws.services.glue.AWSGlueClient.getDatabase(AWSGlueClient.java:4435)
at
com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient.doesDefaultDBExist(AWSCatalogMetastoreClient.java:238)
at
com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient.<init>(AWSCatalogMetastoreClient.java:151)
at
com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory.createMetaStoreClient(AWSGlueDataCatalogHiveClientFactory.java:20)
at
org.apache.hadoop.hive.ql.metadata.HiveUtils.createMetaStoreClient(HiveUtils.java:507)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3746)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3726)
at
org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:3988)
at
org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:251)
at
org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:234)
at org.apache.hadoop.hive.ql.metadata.Hive.<init>(Hive.java:402)
at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:335)
at org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:315)
at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:291)
at
org.apache.spark.sql.hive.client.HiveClientImpl.client(HiveClientImpl.scala:257)
at
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:283)
at
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224)
at
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223)
at
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273)
at
org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:384)
at
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$databaseExists$1(HiveExternalCatalog.scala:249)
at
scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:105)
at
org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:249)
at
org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:135)
at
org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:125)
at
org.apache.spark.sql.internal.SharedState.isDatabaseExistent$1(SharedState.scala:169)
at
org.apache.spark.sql.internal.SharedState.globalTempViewManager$lzycompute(SharedState.scala:201)
at
org.apache.spark.sql.internal.SharedState.globalTempViewManager(SharedState.scala:153)
at
org.apache.spark.sql.hive.HiveSessionStateBuilder.$anonfun$catalog$2(HiveSessionStateBuilder.scala:52)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager$lzycompute(SessionCatalog.scala:99)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager(SessionCatalog.scala:99)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.setCurrentDatabase(SessionCatalog.scala:281)
at
org.apache.spark.sql.connector.catalog.CatalogManager.setCurrentNamespace(CatalogManager.scala:104)
at
org.apache.spark.sql.execution.datasources.v2.SetCatalogAndNamespaceExec.$anonfun$run$2(SetCatalogAndNamespaceExec.scala:36)
at
org.apache.spark.sql.execution.datasources.v2.SetCatalogAndNamespaceExec.$anonfun$run$2$adapted(SetCatalogAndNamespaceExec.scala:36)
at scala.Option.foreach(Option.scala:407)
at
org.apache.spark.sql.execution.datasources.v2.SetCatalogAndNamespaceExec.run(SetCatalogAndNamespaceExec.scala:36)
at
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:40)
at
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:40)
at
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:46)
at
org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:230)
at
org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3751)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3749)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:230)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:101)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:98)
at
org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:618)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:613)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
Traceback (most recent call last):
File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/session.py", line
723, in sql
return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py",
line 1305, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line
111, in deco
return f(*a, **kw)
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py",
line 328, in get_return_value
format(target_id, ".", name), value)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]