Rajesh Balamohan created HIVE-26686:
---------------------------------------
Summary: Iceberg: Having lot of snapshots impacts runtime due to
multiple loads of the table
Key: HIVE-26686
URL: https://issues.apache.org/jira/browse/HIVE-26686
Project: Hive
Issue Type: Improvement
Components: HiveServer2
Reporter: Rajesh Balamohan
When large number of snpashots are present in manifest file, it adversely
impacts the runtime of the queries. (e.g 15 mts trickle feed).
Having more snapshots will slow down runtime in 2 additional places.
1. At the time of populating statistics, it tries to load the table details
again. i.e refresh table invocation
2. At the time of hive metastore hook (HiveIcebergMetaHook::doPreAlterTable),
during pre alter table.
Need to check if entire table information along with snapshot details are
needed for this.
{noformat}
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeArray(JsonNodeDeserializer.java:437)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:261)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4218)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3251)
at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:264)
at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:258)
at
org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$0(BaseMetastoreTableOperations.java:177)
at
org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$685/0x0000000840e1b440.apply(Unknown
Source)
at
org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$1(BaseMetastoreTableOperations.java:191)
at
org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$686/0x0000000840e1a840.run(Unknown
Source)
at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:404)
at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:214)
at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:198)
at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:190)
at
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:191)
at
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:176)
at
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:171)
at
org.apache.iceberg.hive.HiveTableOperations.doRefresh(HiveTableOperations.java:153)
at
org.apache.iceberg.BaseMetastoreTableOperations.refresh(BaseMetastoreTableOperations.java:96)
at
org.apache.iceberg.BaseMetastoreTableOperations.current(BaseMetastoreTableOperations.java:79)
at
org.apache.iceberg.BaseMetastoreCatalog.loadTable(BaseMetastoreCatalog.java:44)
at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:116)
at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:106)
at
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.getBasicStatistics(HiveIcebergStorageHandler.java:309)
at
org.apache.hadoop.hive.ql.stats.BasicStatsTask$BasicStatsProcessor.<init>(BasicStatsTask.java:138)
at
org.apache.hadoop.hive.ql.stats.BasicStatsTask.aggregateStats(BasicStatsTask.java:301)
at
org.apache.hadoop.hive.ql.stats.BasicStatsTask.process(BasicStatsTask.java:108)
at org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:107)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:806)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:540)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:534)
at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
at java.security.AccessController.doPrivileged([email protected]/Native
Method)
at javax.security.auth.Subject.doAs([email protected]/Subject.java:423)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
at
java.util.concurrent.Executors$RunnableAdapter.call([email protected]/Executors.java:515)
at
java.util.concurrent.FutureTask.run([email protected]/FutureTask.java:264)
at
java.util.concurrent.Executors$RunnableAdapter.call([email protected]/Executors.java:515)
{noformat}
{noformat}
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:258)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeArray(JsonNodeDeserializer.java:437)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:261)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4218)
at
org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3251)
at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:264)
at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:258)
at
org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$0(BaseMetastoreTableOperations.java:177)
at
org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$685/0x0000000840e1b440.apply(Unknown
Source)
at
org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$1(BaseMetastoreTableOperations.java:191)
at
org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$686/0x0000000840e1a840.run(Unknown
Source)
at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:404)
at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:214)
at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:198)
at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:190)
at
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:191)
at
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:176)
at
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:171)
at
org.apache.iceberg.hive.HiveTableOperations.doRefresh(HiveTableOperations.java:153)
at
org.apache.iceberg.BaseMetastoreTableOperations.refresh(BaseMetastoreTableOperations.java:96)
at
org.apache.iceberg.BaseMetastoreTableOperations.current(BaseMetastoreTableOperations.java:79)
at
org.apache.iceberg.BaseMetastoreCatalog.loadTable(BaseMetastoreCatalog.java:44)
at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:116)
at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:106)
at
org.apache.iceberg.mr.hive.IcebergTableUtil.lambda$getTable$1(IcebergTableUtil.java:99)
at
org.apache.iceberg.mr.hive.IcebergTableUtil$$Lambda$669/0x0000000840e1f840.apply(Unknown
Source)
at
org.apache.iceberg.mr.hive.IcebergTableUtil.getTable(IcebergTableUtil.java:105)
at
org.apache.iceberg.mr.hive.HiveIcebergMetaHook.doPreAlterTable(HiveIcebergMetaHook.java:323)
at
org.apache.iceberg.mr.hive.HiveIcebergMetaHook.preAlterTable(HiveIcebergMetaHook.java:313)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:514)
at
org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:508)
at jdk.internal.reflect.GeneratedMethodAccessor233.invoke(Unknown Source)
{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)