kapjoshi-cisco commented on issue #5636:
URL: https://github.com/apache/hudi/issues/5636#issuecomment-1205808116
@xushiyan i was able to run and insert data to hudi using above stated
configuration with 0.11.1 but with condition of updating `structs` and `arrays`
types to `string` type. But if i dont change the type to `string` then i get
this error, please let me know what am I missing here. I tried following steps
from issue #5484 with no luck.
*config*:
```bash
commonConfig = {
'className': 'org.apache.hudi',
'hoodie.datasource.hive_sync.use_jdbc': 'false',
'hoodie.datasource.write.precombine.field': 'ModTime',
'hoodie.datasource.write.recordkey.field': 'Moid',
# 'hoodie.datasource.hive_sync.mode': 'hms', # tried with
and without sync.mode
# 'hoodie.meta.sync.client.tool.class':
'org.apache.hudi.aws.sync.AwsGlueCatalogSyncTool',
'hoodie.table.name': 'intersight',
'hoodie.consistency.check.enabled': 'true',
'hoodie.datasource.hive_sync.database':
args['database_name'],
'hoodie.datasource.write.reconcile.schema': 'true',
'hoodie.datasource.hive_sync.table': 'intersight' +
prefix.replace("/", "_").lower(),
'hoodie.datasource.hive_sync.enable': 'true', 'path':
's3://' + args['curated_bucket'] + '/merged/app' + prefix
}
```
*Error*:
```bash
2022-08-04 21:45:23,099 ERROR [main] glue.ProcessLauncher
(Logging.scala:logError(73)): Error from Python:Traceback (most recent call
last):
File "/tmp/second-delete-upsert.py", line 236, in <module>
startMerging(df_prefix_map_list)
File "/tmp/second-delete-upsert.py", line 171, in startMerging
.mode('append') \
File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py",
line 1107, in save
self._jwrite.save()
File
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line
1305, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line
111, in deco
return f(*a, **kw)
File "/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py",
line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o145.save.
: org.apache.hudi.exception.HoodieException: Could not sync using the meta
sync class org.apache.hudi.hive.HiveSyncTool
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:58)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$2(HoodieSparkSqlWriter.scala:648)
at
org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$2$adapted(HoodieSparkSqlWriter.scala:647)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:647)
at
org.apache.hudi.HoodieSparkSqlWriter$.bulkInsertAsRow(HoodieSparkSqlWriter.scala:592)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:178)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:184)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:185)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:223)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:220)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:181)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:134)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:133)
at
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:301)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
Caused by: org.apache.hudi.exception.HoodieException: Got runtime exception
when hive syncing intersight_asset_deviceregistrations
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:143)
at
org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:56)
... 45 more
Caused by: org.apache.hudi.hive.HoodieHiveSyncException: Could not convert
field Type from STRING to
ARRAY<STRUCT<ClassId:string,Moid:string,ObjectType:string,link:string>> for
field permissionresources
at
org.apache.hudi.hive.util.HiveSchemaUtil.getSchemaDifference(HiveSchemaUtil.java:109)
at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:284)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:218)
at org.apache.hudi.hive.HiveSyncTool.doSync(HiveSyncTool.java:152)
at
org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:140)
... 46 more
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]