In my experience having looked at way to many heap dumps from
hiveserver2 it always end up being a seriously over partitioned table
and a user who decided to do a full table scan basically requesting all
partitions. This often is by accident for example when using
unix_timestamp to convert dates you don't realize it's not flagged as
deterministic and as a consequence accidentally doing a full table scan.
Sometimes not so accidental.
If the 650M is the file on disk you might just be looking at compression
at work. Our hprof files are often significantly smaller than the memory
they actually occupy.
If you are hit by this consider using strict mode. It's annoying but
also makes these problems more visible.
On 10/10/15 16:09, Sanjeev Verma wrote:
Even having enough heap size my hiveserver2 going outofmemory, I enable
heap dump on error which producing 650MB of heap although I have
hiveserver2 configured with 8GB Heap.
here is the stacktrace of the thread which went in to OOM,could anybody let
me know why it throwing OOM
"pool-2-thread-4" prio=5 tid=40 RUNNABLE
at java.lang.OutOfMemoryError.<init>(OutOfMemoryError.java:48)
at java.lang.StringCoding$StringDecoder.decode(StringCoding.java:149)
Local Variable: byte[]#1567
Local Variable: java.lang.StringCoding$StringDecoder#1
at java.lang.StringCoding.decode(StringCoding.java:193)
at java.lang.String.<init>(String.java:416)
at java.lang.String.<init>(String.java:481)
at
org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:355)
at
org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:347)
at
org.apache.hadoop.hive.metastore.api.FieldSchema$FieldSchemaStandardScheme.read(FieldSchema.java:490)
at
org.apache.hadoop.hive.metastore.api.FieldSchema$FieldSchemaStandardScheme.read(FieldSchema.java:476)
at
org.apache.hadoop.hive.metastore.api.FieldSchema.read(FieldSchema.java:410)
at
org.apache.hadoop.hive.metastore.api.StorageDescriptor$StorageDescriptorStandardScheme.read(StorageDescriptor.java:1309)
Local Variable:
org.apache.hadoop.hive.metastore.api.StorageDescriptor#8459
Local Variable: org.apache.hadoop.hive.metastore.api.FieldSchema#276777
at
org.apache.hadoop.hive.metastore.api.StorageDescriptor$StorageDescriptorStandardScheme.read(StorageDescriptor.java:1288)
at
org.apache.hadoop.hive.metastore.api.StorageDescriptor.read(StorageDescriptor.java:1150)
at
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:994)
at
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:929)
at org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:821)
at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java:56468)
Local Variable: org.apache.hadoop.hive.metastore.api.Partition#8450
at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java:56447)
Local Variable:
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme#1
at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java:56381)
Local Variable: org.apache.thrift.protocol.TBinaryProtocol#10
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78)
Local Variable: java.lang.String#802229
Local Variable: org.apache.thrift.protocol.TMessage#2
at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:1751)
Local Variable:
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result#1
at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:1736)
Local Variable:
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client#8
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:880)
at sun.reflect.GeneratedMethodAccessor36.invoke(<unknown string>)
Local Variable: sun.reflect.GeneratedMethodAccessor36#1
Local Variable: org.apache.hadoop.hive.metastore.HiveMetaStoreClient#8
Local Variable: java.lang.Short#129
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:89)
Local Variable:
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient#8
Local Variable: java.lang.reflect.Method#397
Local Variable: java.lang.Object[]#24405
at com.sun.proxy.$Proxy10.listPartitions(<unknown string>)
Local Variable: com.sun.proxy.$Proxy10#8
Local Variable: java.lang.String#674524
at
org.apache.hadoop.hive.ql.metadata.Hive.getAllPartitionsOf(Hive.java:1785)
at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.getAllPartitions(PartitionPruner.java:344)
at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.getPartitionsFromServer(PartitionPruner.java:286)
at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(PartitionPruner.java:180)
Local Variable: java.util.HashMap#38215
Local Variable: java.lang.String#674746
at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(PartitionPruner.java:140)
at
org.apache.hadoop.hive.ql.parse.ParseContext.getPrunedPartitions(ParseContext.java:619)
Local Variable: java.lang.String#674523
at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor.checkSortGroupBy(GroupByOptimizer.java:393)
Local Variable: org.apache.hadoop.hive.ql.exec.TableScanOperator#8
Local Variable: org.apache.hadoop.hive.ql.metadata.Table#8
Local Variable: java.util.HashMap#38293
Local Variable: java.util.HashSet#8737
Local Variable: java.util.ArrayList#53457
Local Variable: java.util.ArrayList$Itr#3
at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor.processGroupBy(GroupByOptimizer.java:175)
at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor.process(GroupByOptimizer.java:267)
Local Variable: org.apache.hadoop.hive.ql.exec.GroupByOperator#15
Local Variable:
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$GroupByOptimizerContext#1
Local Variable:
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor#1
at
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:94)
Local Variable: java.lang.Object[]#24402
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:78)
Local Variable: java.util.Stack#1
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:132)
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:109)
Local Variable: org.apache.hadoop.hive.ql.exec.GroupByOperator#16
at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer.transform(GroupByOptimizer.java:113)
Local Variable: org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher#1
Local Variable: org.apache.hadoop.hive.ql.lib.DefaultGraphWalker#1
Local Variable: java.util.ArrayList#53456
Local Variable: java.util.LinkedHashMap#8760
at
org.apache.hadoop.hive.ql.optimizer.Optimizer.optimize(Optimizer.java:146)
Local Variable: java.util.ArrayList$Itr#2
Local Variable: org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer#1
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:9348)
Local Variable:
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer$Phase1Ctx#1
Local Variable: org.apache.hadoop.hive.ql.exec.FileSinkOperator#8
Local Variable: org.apache.hadoop.hive.ql.optimizer.Optimizer#1
Local Variable: org.apache.hadoop.hive.ql.parse.ParseContext#1
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
Local Variable: org.apache.hadoop.hive.ql.Context#8
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:426)
Local Variable: org.apache.hadoop.hive.ql.parse.ASTNode#12
Local Variable: org.apache.hadoop.hive.ql.parse.SemanticAnalyzer#1
Local Variable: org.apache.hadoop.hive.ql.log.PerfLogger#9
Local Variable: org.apache.hadoop.hive.ql.Driver$QueryState#1
Local Variable: java.util.ArrayList#53358
Local Variable:
org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl#1
Local Variable: org.apache.hadoop.hive.ql.parse.ParseDriver#8
Local Variable: java.util.ArrayList$Itr#1
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:324)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:980)
Local Variable: java.lang.Object#3307
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:973)
Local Variable: org.apache.hadoop.hive.ql.Driver#8
at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:99)
at
org.apache.hive.service.cli.operation.SQLOperation.run(SQLOperation.java:172)
Local Variable: org.apache.hadoop.hive.conf.HiveConf#30
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:244)
Local Variable: org.apache.hive.service.cli.OperationHandle#8
Local Variable: org.apache.hive.service.cli.operation.SQLOperation#8
Local Variable: org.apache.hive.service.cli.operation.OperationManager#1
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:228)
at sun.reflect.GeneratedMethodAccessor26.invoke(<unknown string>)
Local Variable: sun.reflect.GeneratedMethodAccessor26#1
Local Variable:
org.apache.hive.service.cli.session.HiveSessionImplwithUGI#8
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:79)
at
org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:37)
at
org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:64)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
Local Variable: java.security.AccessControlContext#26
Local Variable: javax.security.auth.Subject#16
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1566)
at
org.apache.hadoop.hive.shims.HadoopShimsSecure.doAs(HadoopShimsSecure.java:500)
Local Variable: org.apache.hive.service.cli.session.HiveSessionProxy$1#1
Local Variable: org.apache.hadoop.hive.shims.Hadoop23Shims#1
Local Variable: org.apache.hadoop.security.UserGroupInformation#87
at
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:60)
Local Variable: java.lang.reflect.Method#431
Local Variable: java.lang.Object[]#24298
Local Variable: org.apache.hive.service.cli.session.HiveSessionProxy#8
at com.sun.proxy.$Proxy11.executeStatementAsync(<unknown string>)
Local Variable: com.sun.proxy.$Proxy11#8
at
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:239)
Local Variable: org.apache.hive.service.cli.CLIService#1
at
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:358)
Local Variable: org.apache.hive.service.cli.SessionHandle#9
Local Variable: java.util.HashMap#38189
Local Variable:
org.apache.hive.service.cli.thrift.TExecuteStatementResp#1
Local Variable: java.lang.String#674503
Local Variable:
org.apache.hive.service.cli.thrift.TExecuteStatementReq#1
Local Variable: java.lang.Boolean#1
at
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1373)
Local Variable:
org.apache.hive.service.cli.thrift.TCLIService$ExecuteStatement_result#1
at
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1358)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
Local Variable:
org.apache.hive.service.cli.thrift.TCLIService$ExecuteStatement_args#1
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
Local Variable:
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement#1
Local Variable: org.apache.thrift.protocol.TMessage#1
at
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:55)
at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:206)
Local Variable: org.apache.hive.service.auth.TSetIpAddressProcessor#1
Local Variable:
org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$HiveSaslServerTransportFactory$1#1
Local Variable: org.apache.thrift.protocol.TBinaryProtocol#8
Local Variable: org.apache.thrift.protocol.TBinaryProtocol#9
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
Local Variable:
org.apache.thrift.server.TThreadPoolServer$WorkerProcess#1
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
Local Variable: java.util.concurrent.ThreadPoolExecutor$Worker#5
at java.lang.Thread.run(Thread.java:744)