Rajesh Balamohan created HIVE-27354:
---------------------------------------
Summary: Iceberg listing all files during commit can cause delays
in large tables
Key: HIVE-27354
URL: https://issues.apache.org/jira/browse/HIVE-27354
Project: Hive
Issue Type: Improvement
Reporter: Rajesh Balamohan
When committing table with create table, iceberg invokes HMS APIs. This
internally lists all files in the folder for updating stats. This is not needed
for iceberg tables.
Following is the stacktrace for later reference.
{noformat}
at
org.apache.hadoop.hive.common.FileUtils.listStatusRecursively(FileUtils.java:329)
at
org.apache.hadoop.hive.common.FileUtils.listStatusRecursively(FileUtils.java:330)
at
org.apache.hadoop.hive.common.FileUtils.listStatusRecursively(FileUtils.java:330)
at
org.apache.hadoop.hive.common.HiveStatsUtils.getFileStatusRecurse(HiveStatsUtils.java:61)
at
org.apache.hadoop.hive.metastore.Warehouse.getFileStatusesForUnpartitionedTable(Warehouse.java:581)
at
org.apache.hadoop.hive.metastore.MetaStoreUtils.updateTableStatsFast(MetaStoreUtils.java:201)
at
org.apache.hadoop.hive.metastore.MetaStoreUtils.updateTableStatsFast(MetaStoreUtils.java:194)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_core(HiveMetaStore.java:1445)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_with_environment_context(HiveMetaStore.java:1502)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:148)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
at com.sun.proxy.$Proxy69.create_table_with_environment_context(Unknown
Source)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.create_table_with_environment_context(HiveMetaStoreClient.java:2419)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:755)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:743)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
...
...
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173)
at com.sun.proxy.$Proxy70.createTable(Unknown Source)
at
org.apache.iceberg.hive.HiveTableOperations.lambda$persistTable$4(HiveTableOperations.java:405)
at
org.apache.iceberg.hive.HiveTableOperations$$Lambda$4533/374509974.run(Unknown
Source)
at org.apache.iceberg.ClientPoolImpl.run(ClientPoolImpl.java:58)
at org.apache.iceberg.ClientPoolImpl.run(ClientPoolImpl.java:51)
at
org.apache.iceberg.hive.CachedClientPool.run(CachedClientPool.java:82)
at
org.apache.iceberg.hive.HiveTableOperations.persistTable(HiveTableOperations.java:403)
at
org.apache.iceberg.hive.HiveTableOperations.doCommit(HiveTableOperations.java:327)
at
org.apache.iceberg.BaseMetastoreTableOperations.commit(BaseMetastoreTableOperations.java:135)
{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)