gfn9cho opened a new issue #954:  
org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: 
<hivedb.tableName> table not found
URL: https://github.com/apache/incubator-hudi/issues/954
 
 
   I am using hudi-spark-bundle-0.5.1-SNAPSHOT.jar in EMR and getting the below 
exception in hiveSync.
   We are using AWS glue catalog for hive metastore.
   Hive table is getting created. I could see the table in hive with no data in 
it.
   org.apache.hudi.hive.HoodieHiveSyncException: Failed to sync partitions for 
table <tableName>
     at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:172)
     at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:107)
     at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:67)
     at 
org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:235)
     at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:169)
     at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91)
     at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
     at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
     at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
     at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
     at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
     at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
     at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
     at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
     at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
     at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
     at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
     at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
     at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
     at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
     at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
     at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
     at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
     at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668)
     at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
     at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
     at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228)
     ... 69 elided
   Caused by: 
org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: 
<hiveDB>.<tableName> table not found
     at 
org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
     at 
org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
     at 
org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
     at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86)
     at 
org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:2377)
     at 
org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:2362)
     at 
org.apache.hudi.org.apache.hadoop_hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1162)
     at 
org.apache.hudi.hive.HoodieHiveClient.scanTablePartitions(HoodieHiveClient.java:240)
     at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:162)
     ... 95 more
   
   Below is the code,
   spark-shell --master yarn --deploy-mode client  --conf 
spark.shuffle.spill=true \
    --conf spark.scheduler.mode=FIFO \
    --conf spark.executor.extraJavaOptions=-XX:MaxPermSize=1024m \
    --conf spark.sql.planner.externalSort=true --conf 
spark.shuffle.manager=sort \
    --conf spark.ui.port=8088 --conf spark.executor.memoryOverhead=2g  \
    --conf spark.rpc.message.maxSize=1024 --conf spark.file.transferTo=false \
    --conf spark.driver.maxResultSize=3g --conf spark.rdd.compress=true \
    --conf 
spark.executor.extraJavaOptions="-Dconfig.resource=spark-defaults.conf" \
    --conf 
spark.driver.JavaOptions="-Dspark.yarn.app.container.log.dir=/mnt/var/log/hadoop"
 \
    --conf spark.driver.extraJavaOptions="-Dconfig.file=spark-defaults.conf" \
    --conf spark.sql.parquet.writeLegacyFormat=true \
    --conf spark.enable.dynamicAllocation=true \
    --conf spark.dynamicAllocation.maxExecutors=10 \
    --conf spark.dynamicAllocation.minExecutors=1 \
    --conf spark.executor.cores=5 \
    --conf spark.executor.memory=3g --conf spark.driver.memory=2g  \
    --conf spark.executor.instances=4 --conf 
spark.serializer=org.apache.spark.serializer.KryoSerializer  \
    --name gwpl_staging_load_hudi \
    --files /etc/spark/conf/hive-site.xml \
    --properties-file /usr/lib/spark/conf/spark-defaults.conf \
    --jars /home/hadoop/hudi/hudi-spark-bundle-0.5.1-SNAPSHOT.jar 
   
   import org.apache.hudi.DataSourceWriteOptions
   import org.apache.hudi.config.HoodieWriteConfig
   import org.apache.spark.sql._
   import org.apache.spark.sql.SaveMode._
   import org.apache.spark.sql.expressions.Window
   import org.apache.spark.sql.functions._
   import org.joda.time.format.DateTimeFormat
   
   val stagePrefix="stg_gwpl"
   val harmonizedStageDB="uat_edf_staging"
   val harmonizedstagePath="s3://sa-l3-uat-emr-edl-processed/staging"
   val table="pc_policy"
   
   val incrementalData=spark.sql("select * from 
uat_connect_gwpl_data_processed.pc_policy limit 100").cache
   
   incrementalData.write.
   format("org.apache.hudi").
   option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY,"ID").
   option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "ingestiondt").
   option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "UpdateTime").
   option(HoodieWriteConfig.TABLE_NAME, stagePrefix + "_hudi_" + table).
   option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, 
"jdbc:hive2:hiveserver:10000").
   option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
   option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
   option("hoodie.datasource.hive_sync.enable", true).
   option("hoodie.datasource.hive_sync.database",harmonizedStageDB).
   option("hoodie.datasource.hive_sync.table",stagePrefix + "_hudi_" + table).
   option("hoodie.datasource.hive_sync.partition_fields","ingestiondt").
   mode(SaveMode.Overwrite).
   save(s"${harmonizedstagePath}/hudi/$table")
   
   Please let me know if I can provide more details to it.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to