[
https://issues.apache.org/jira/browse/HUDI-9141?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Shawn Chang updated HUDI-9141:
------------------------------
Description:
Database name can be null or empty, but in either case they should use the
default value `default`. Handle this corner case in `HoodieFileIndex`
Issue this can cause:
```
Caused by: org.apache.spark.sql.AnalysisException:
org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch table
basic_hudi_test_cow_glue_enabled_d7f80d5c_37ef_4c0a_a2bf_f2777b080a93. dbName
cannot be null or empty
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:131)
at
org.apache.spark.sql.hive.HiveExternalCatalog.tableExists(HiveExternalCatalog.scala:917)
at
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.tableExists(ExternalCatalogWithListener.scala:146)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.tableExists(SessionCatalog.scala:524)
at org.apache.spark.sql.internal.CatalogImpl.tableExists(CatalogImpl.scala:536)
at
org.apache.hudi.HoodieFileIndex$.getConfigProperties(HoodieFileIndex.scala:542)
at org.apache.hudi.HoodieFileIndex.<init>(HoodieFileIndex.scala:97)
at
org.apache.hudi.HoodieMergeOnReadSnapshotHadoopFsRelationFactory.<init>(HoodieHadoopFsRelationFactory.scala:267)
at
org.apache.hudi.HoodieCopyOnWriteSnapshotHadoopFsRelationFactory.<init>(HoodieHadoopFsRelationFactory.scala:355)
at org.apache.hudi.DefaultSource$.createRelation(DefaultSource.scala:346)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:158)
at
org.apache.spark.sql.execution.datasources.DataSource.$anonfun$resolveRelation$6(DataSource.scala:366)
at
org.apache.spark.util.FileAccessContext$.withContext(FileAccessContext.scala:41)
at
org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:363)
at
org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:345)
at
org.apache.spark.sql.execution.datasources.FindDataSourceTable$$anon$1.call(DataSourceStrategy.scala:268)
at
org.apache.spark.sql.execution.datasources.FindDataSourceTable$$anon$1.call(DataSourceStrategy.scala:255)
at
org.sparkproject.guava.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4792)
at
org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
at
org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
... 146 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch
table basic_hudi_test_cow_glue_enabled_d7f80d5c_37ef_4c0a_a2bf_f2777b080a93.
dbName cannot be null or empty
at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1307)
at org.apache.spark.sql.hive.client.Shim_v0_12.getTable(HiveShim.scala:628)
at
org.apache.spark.sql.hive.client.HiveClientImpl.getRawTableOption(HiveClientImpl.scala:414)
at
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$tableExists$1(HiveClientImpl.scala:429)
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:303)
at
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:234)
at
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:233)
at
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:283)
at
org.apache.spark.sql.hive.client.HiveClientImpl.tableExists(HiveClientImpl.scala:429)
at
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$tableExists$1(HiveExternalCatalog.scala:917)
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:102)
... 165 more
Caused by: java.lang.IllegalArgumentException: dbName cannot be null or empty
at com.google.common.base.Preconditions.checkArgument(Preconditions.java:92)
at
com.amazonaws.glue.catalog.metastore.GlueMetastoreClientDelegate.getTable(GlueMetastoreClientDelegate.java:556)
at
com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient.getTable(AWSCatalogMetastoreClient.java:1104)
at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1299)
... 177 more
```
> Hudi Spark should fall back to use default as database name if not set
> ----------------------------------------------------------------------
>
> Key: HUDI-9141
> URL: https://issues.apache.org/jira/browse/HUDI-9141
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Shawn Chang
> Priority: Major
>
> Database name can be null or empty, but in either case they should use the
> default value `default`. Handle this corner case in `HoodieFileIndex`
>
> Issue this can cause:
> ```
> Caused by: org.apache.spark.sql.AnalysisException:
> org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch table
> basic_hudi_test_cow_glue_enabled_d7f80d5c_37ef_4c0a_a2bf_f2777b080a93. dbName
> cannot be null or empty
> at
> org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:131)
> at
> org.apache.spark.sql.hive.HiveExternalCatalog.tableExists(HiveExternalCatalog.scala:917)
> at
> org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.tableExists(ExternalCatalogWithListener.scala:146)
> at
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.tableExists(SessionCatalog.scala:524)
> at
> org.apache.spark.sql.internal.CatalogImpl.tableExists(CatalogImpl.scala:536)
> at
> org.apache.hudi.HoodieFileIndex$.getConfigProperties(HoodieFileIndex.scala:542)
> at org.apache.hudi.HoodieFileIndex.<init>(HoodieFileIndex.scala:97)
> at
> org.apache.hudi.HoodieMergeOnReadSnapshotHadoopFsRelationFactory.<init>(HoodieHadoopFsRelationFactory.scala:267)
> at
> org.apache.hudi.HoodieCopyOnWriteSnapshotHadoopFsRelationFactory.<init>(HoodieHadoopFsRelationFactory.scala:355)
> at org.apache.hudi.DefaultSource$.createRelation(DefaultSource.scala:346)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:158)
> at
> org.apache.spark.sql.execution.datasources.DataSource.$anonfun$resolveRelation$6(DataSource.scala:366)
> at
> org.apache.spark.util.FileAccessContext$.withContext(FileAccessContext.scala:41)
> at
> org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:363)
> at
> org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:345)
> at
> org.apache.spark.sql.execution.datasources.FindDataSourceTable$$anon$1.call(DataSourceStrategy.scala:268)
> at
> org.apache.spark.sql.execution.datasources.FindDataSourceTable$$anon$1.call(DataSourceStrategy.scala:255)
> at
> org.sparkproject.guava.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4792)
> at
> org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
> at
> org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
> ... 146 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch
> table basic_hudi_test_cow_glue_enabled_d7f80d5c_37ef_4c0a_a2bf_f2777b080a93.
> dbName cannot be null or empty
> at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1307)
> at org.apache.spark.sql.hive.client.Shim_v0_12.getTable(HiveShim.scala:628)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.getRawTableOption(HiveClientImpl.scala:414)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$tableExists$1(HiveClientImpl.scala:429)
> at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:303)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:234)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:233)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:283)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.tableExists(HiveClientImpl.scala:429)
> at
> org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$tableExists$1(HiveExternalCatalog.scala:917)
> at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
> at
> org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:102)
> ... 165 more
> Caused by: java.lang.IllegalArgumentException: dbName cannot be null or empty
> at com.google.common.base.Preconditions.checkArgument(Preconditions.java:92)
> at
> com.amazonaws.glue.catalog.metastore.GlueMetastoreClientDelegate.getTable(GlueMetastoreClientDelegate.java:556)
> at
> com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient.getTable(AWSCatalogMetastoreClient.java:1104)
> at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1299)
> ... 177 more
> ```
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)