[
https://issues.apache.org/jira/browse/HIVE-24790?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Swaminathan Balachandran reassigned HIVE-24790:
-----------------------------------------------
Assignee: (was: Swaminathan Balachandran)
> Batch column stats updates to HMS
> ---------------------------------
>
> Key: HIVE-24790
> URL: https://issues.apache.org/jira/browse/HIVE-24790
> Project: Hive
> Issue Type: Improvement
> Components: HiveServer2
> Reporter: Rajesh Balamohan
> Priority: Major
>
> When large number of partitions are inserted/updated, it would be good to
> batch column statistics updates to HMS.
> Currently, HS2 ends up throwing read timeout exception when updating HMS.
> https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java#L180
> {noformat}
> Caused by: java.net.SocketTimeoutException: Read timed out
> at java.net.SocketInputStream.socketRead0(Native Method)
> ~[?:1.8.0_252]
> at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
> ~[?:1.8.0_252]
> at java.net.SocketInputStream.read(SocketInputStream.java:171)
> ~[?:1.8.0_252]
> at java.net.SocketInputStream.read(SocketInputStream.java:141)
> ~[?:1.8.0_252]
> at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
> ~[?:1.8.0_252]
> at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
> ~[?:1.8.0_252]
> at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
> ~[?:1.8.0_252]
> at
> org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127)
> ~[hive-exec-3.1]
> at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.transport.TSaslTransport.readLength(TSaslTransport.java:374)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.transport.TSaslTransport.readFrame(TSaslTransport.java:451)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:433)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:37)
> ~[hive-exec-3.1]
> at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:62)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219)
> ~[hive-exec-3.1]
> at
> org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_set_aggr_stats_for(ThriftHiveMetastore.java:4561)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.set_aggr_stats_for(ThriftHiveMetastore.java:4548)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.setPartitionColumnStatistics(HiveMetaStoreClient.java:2496)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.setPartitionColumnStatistics(SessionHiveMetaStoreClient.java:515)
> ~[hive-exec-3.1]
> at sun.reflect.GeneratedMethodAccessor194.invoke(Unknown Source)
> ~[?:?]
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> ~[?:1.8.0_252]
> at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_252]
> at
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:212)
> ~[hive-exec-3.1]
> at com.sun.proxy.$Proxy60.setPartitionColumnStatistics(Unknown
> Source) ~[?:?]
> at sun.reflect.GeneratedMethodAccessor194.invoke(Unknown Source)
> ~[?:?]
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> ~[?:1.8.0_252]
> at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_252]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:3431)
> ~[hive-exec-3.1]
> at com.sun.proxy.$Proxy60.setPartitionColumnStatistics(Unknown
> Source) ~[?:?]
> at
> org.apache.hadoop.hive.ql.metadata.Hive.setPartitionColumnStatistics(Hive.java:5213)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.ql.stats.ColStatsProcessor.persistColumnStats(ColStatsProcessor.java:192)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.ql.stats.ColStatsProcessor.process(ColStatsProcessor.java:87)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:107)
> ~[hive-exec-3.1]
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> ~[hive-exec-3.1]
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> ~[hive-exec-3.1]
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
> ~[hive-exec-3.1]
> {noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)