In my experience having looked at way to many heap dumps from hiveserver2 it always end up being a seriously over partitioned table and a user who decided to do a full table scan basically requesting all partitions. This often is by accident for example when using unix_timestamp to convert dates you don't realize it's not flagged as deterministic and as a consequence accidentally doing a full table scan. Sometimes not so accidental. If the 650M is the file on disk you might just be looking at compression at work. Our hprof files are often significantly smaller than the memory they actually occupy. If you are hit by this consider using strict mode. It's annoying but also makes these problems more visible.

On 10/10/15 16:09, Sanjeev Verma wrote:
Even having enough heap size my hiveserver2 going outofmemory, I enable
heap dump on error which producing 650MB of heap although I have
hiveserver2 configured with 8GB Heap.

here is the stacktrace of the thread which went in to OOM,could anybody let
me know why it throwing OOM

"pool-2-thread-4" prio=5 tid=40 RUNNABLE
  at java.lang.OutOfMemoryError.<init>(OutOfMemoryError.java:48)
  at java.lang.StringCoding$StringDecoder.decode(StringCoding.java:149)
     Local Variable: byte[]#1567
     Local Variable: java.lang.StringCoding$StringDecoder#1
  at java.lang.StringCoding.decode(StringCoding.java:193)
  at java.lang.String.<init>(String.java:416)
  at java.lang.String.<init>(String.java:481)
  at
org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:355)
  at
org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:347)
  at
org.apache.hadoop.hive.metastore.api.FieldSchema$FieldSchemaStandardScheme.read(FieldSchema.java:490)
  at
org.apache.hadoop.hive.metastore.api.FieldSchema$FieldSchemaStandardScheme.read(FieldSchema.java:476)
  at
org.apache.hadoop.hive.metastore.api.FieldSchema.read(FieldSchema.java:410)
  at
org.apache.hadoop.hive.metastore.api.StorageDescriptor$StorageDescriptorStandardScheme.read(StorageDescriptor.java:1309)
     Local Variable:
org.apache.hadoop.hive.metastore.api.StorageDescriptor#8459
     Local Variable: org.apache.hadoop.hive.metastore.api.FieldSchema#276777
  at
org.apache.hadoop.hive.metastore.api.StorageDescriptor$StorageDescriptorStandardScheme.read(StorageDescriptor.java:1288)
  at
org.apache.hadoop.hive.metastore.api.StorageDescriptor.read(StorageDescriptor.java:1150)
  at
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:994)
  at
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:929)
  at org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:821)
  at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java:56468)
     Local Variable: org.apache.hadoop.hive.metastore.api.Partition#8450
  at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java:56447)
     Local Variable:
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme#1
  at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java:56381)
     Local Variable: org.apache.thrift.protocol.TBinaryProtocol#10
  at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78)
     Local Variable: java.lang.String#802229
     Local Variable: org.apache.thrift.protocol.TMessage#2
  at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:1751)
     Local Variable:
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result#1
  at
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:1736)
     Local Variable:
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client#8
  at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:880)
  at sun.reflect.GeneratedMethodAccessor36.invoke(<unknown string>)
     Local Variable: sun.reflect.GeneratedMethodAccessor36#1
     Local Variable: org.apache.hadoop.hive.metastore.HiveMetaStoreClient#8
     Local Variable: java.lang.Short#129
  at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  at java.lang.reflect.Method.invoke(Method.java:606)
  at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:89)
     Local Variable:
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient#8
     Local Variable: java.lang.reflect.Method#397
     Local Variable: java.lang.Object[]#24405
  at com.sun.proxy.$Proxy10.listPartitions(<unknown string>)
     Local Variable: com.sun.proxy.$Proxy10#8
     Local Variable: java.lang.String#674524
  at
org.apache.hadoop.hive.ql.metadata.Hive.getAllPartitionsOf(Hive.java:1785)
  at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.getAllPartitions(PartitionPruner.java:344)
  at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.getPartitionsFromServer(PartitionPruner.java:286)
  at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(PartitionPruner.java:180)
     Local Variable: java.util.HashMap#38215
     Local Variable: java.lang.String#674746
  at
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(PartitionPruner.java:140)
  at
org.apache.hadoop.hive.ql.parse.ParseContext.getPrunedPartitions(ParseContext.java:619)
     Local Variable: java.lang.String#674523
  at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor.checkSortGroupBy(GroupByOptimizer.java:393)
     Local Variable: org.apache.hadoop.hive.ql.exec.TableScanOperator#8
     Local Variable: org.apache.hadoop.hive.ql.metadata.Table#8
     Local Variable: java.util.HashMap#38293
     Local Variable: java.util.HashSet#8737
     Local Variable: java.util.ArrayList#53457
     Local Variable: java.util.ArrayList$Itr#3
  at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor.processGroupBy(GroupByOptimizer.java:175)
  at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor.process(GroupByOptimizer.java:267)
     Local Variable: org.apache.hadoop.hive.ql.exec.GroupByOperator#15
     Local Variable:
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$GroupByOptimizerContext#1
     Local Variable:
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer$SortGroupByProcessor#1
  at
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
  at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:94)
     Local Variable: java.lang.Object[]#24402
  at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:78)
     Local Variable: java.util.Stack#1
  at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:132)
  at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:109)
     Local Variable: org.apache.hadoop.hive.ql.exec.GroupByOperator#16
  at
org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer.transform(GroupByOptimizer.java:113)
     Local Variable: org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher#1
     Local Variable: org.apache.hadoop.hive.ql.lib.DefaultGraphWalker#1
     Local Variable: java.util.ArrayList#53456
     Local Variable: java.util.LinkedHashMap#8760
  at
org.apache.hadoop.hive.ql.optimizer.Optimizer.optimize(Optimizer.java:146)
     Local Variable: java.util.ArrayList$Itr#2
     Local Variable: org.apache.hadoop.hive.ql.optimizer.GroupByOptimizer#1
  at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:9348)
     Local Variable:
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer$Phase1Ctx#1
     Local Variable: org.apache.hadoop.hive.ql.exec.FileSinkOperator#8
     Local Variable: org.apache.hadoop.hive.ql.optimizer.Optimizer#1
     Local Variable: org.apache.hadoop.hive.ql.parse.ParseContext#1
  at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
     Local Variable: org.apache.hadoop.hive.ql.Context#8
  at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:426)
     Local Variable: org.apache.hadoop.hive.ql.parse.ASTNode#12
     Local Variable: org.apache.hadoop.hive.ql.parse.SemanticAnalyzer#1
     Local Variable: org.apache.hadoop.hive.ql.log.PerfLogger#9
     Local Variable: org.apache.hadoop.hive.ql.Driver$QueryState#1
     Local Variable: java.util.ArrayList#53358
     Local Variable:
org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl#1
     Local Variable: org.apache.hadoop.hive.ql.parse.ParseDriver#8
     Local Variable: java.util.ArrayList$Itr#1
  at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:324)
  at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:980)
     Local Variable: java.lang.Object#3307
  at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:973)
     Local Variable: org.apache.hadoop.hive.ql.Driver#8
  at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:99)
  at
org.apache.hive.service.cli.operation.SQLOperation.run(SQLOperation.java:172)
     Local Variable: org.apache.hadoop.hive.conf.HiveConf#30
  at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:244)
     Local Variable: org.apache.hive.service.cli.OperationHandle#8
     Local Variable: org.apache.hive.service.cli.operation.SQLOperation#8
     Local Variable: org.apache.hive.service.cli.operation.OperationManager#1
  at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:228)
  at sun.reflect.GeneratedMethodAccessor26.invoke(<unknown string>)
     Local Variable: sun.reflect.GeneratedMethodAccessor26#1
     Local Variable:
org.apache.hive.service.cli.session.HiveSessionImplwithUGI#8
  at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  at java.lang.reflect.Method.invoke(Method.java:606)
  at
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:79)
  at
org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:37)
  at
org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:64)
  at java.security.AccessController.doPrivileged(Native Method)
  at javax.security.auth.Subject.doAs(Subject.java:415)
     Local Variable: java.security.AccessControlContext#26
     Local Variable: javax.security.auth.Subject#16
  at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1566)
  at
org.apache.hadoop.hive.shims.HadoopShimsSecure.doAs(HadoopShimsSecure.java:500)
     Local Variable: org.apache.hive.service.cli.session.HiveSessionProxy$1#1
     Local Variable: org.apache.hadoop.hive.shims.Hadoop23Shims#1
     Local Variable: org.apache.hadoop.security.UserGroupInformation#87
  at
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:60)
     Local Variable: java.lang.reflect.Method#431
     Local Variable: java.lang.Object[]#24298
     Local Variable: org.apache.hive.service.cli.session.HiveSessionProxy#8
  at com.sun.proxy.$Proxy11.executeStatementAsync(<unknown string>)
     Local Variable: com.sun.proxy.$Proxy11#8
  at
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:239)
     Local Variable: org.apache.hive.service.cli.CLIService#1
  at
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:358)
     Local Variable: org.apache.hive.service.cli.SessionHandle#9
     Local Variable: java.util.HashMap#38189
     Local Variable:
org.apache.hive.service.cli.thrift.TExecuteStatementResp#1
     Local Variable: java.lang.String#674503
     Local Variable:
org.apache.hive.service.cli.thrift.TExecuteStatementReq#1
     Local Variable: java.lang.Boolean#1
  at
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1373)
     Local Variable:
org.apache.hive.service.cli.thrift.TCLIService$ExecuteStatement_result#1
  at
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1358)
  at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
     Local Variable:
org.apache.hive.service.cli.thrift.TCLIService$ExecuteStatement_args#1
  at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
     Local Variable:
org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement#1
     Local Variable: org.apache.thrift.protocol.TMessage#1
  at
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:55)
  at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:206)
     Local Variable: org.apache.hive.service.auth.TSetIpAddressProcessor#1
     Local Variable:
org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$HiveSaslServerTransportFactory$1#1
     Local Variable: org.apache.thrift.protocol.TBinaryProtocol#8
     Local Variable: org.apache.thrift.protocol.TBinaryProtocol#9
  at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
     Local Variable:
org.apache.thrift.server.TThreadPoolServer$WorkerProcess#1
  at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
     Local Variable: java.util.concurrent.ThreadPoolExecutor$Worker#5
  at java.lang.Thread.run(Thread.java:744)


Reply via email to