[
https://issues.apache.org/jira/browse/HIVE-21853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
John LeBrun updated HIVE-21853:
-------------------------------
Description:
getWritableSize(ObjectInspector oi, Object value) method in
org.apache.hadoop.hive.ql.stats.StatsUtils class fails with NPE when 2nd
parameter (Object value) is null.
Attached is patch with unit test and fix (HIVE-21853.patch)
Issue was originally found when running UDF query against Hortonworks cluster
with HDP 3.1 running Hive 3.1.0. The issue occurs when executing the UDF
against a cluster using the tez execution engine
beeline hive configurations
set hive.execution.engine=tez;
set hive.fetch.task.conversion=none;
Attached is sample code with an implementation of a simple UDF that duplicates
the behavior (HIVE21853.java).
steps to reproduce
on a Hortonworks cluster with HDP 3.1 deployed
-start beeline Hive session
-set above hive configurations
-add jar containing UDF from sample code
-create table containing one string column
create table tmptable(col1 string)
insert into table tmptable values ('somestring')
-create function bugUdf as 'BugUDF';
-select bugUdf from tmptable;
this will result in a null pointer exception similar to this
ql.Driver ()) - FAILED: NullPointerException nulljava.lang.NullPointerException
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getWritableSize(StatsUtils.java:1373)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfStruct(StatsUtils.java:1356)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfComplexTypes(StatsUtils.java:1212)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getAvgColLenOf(StatsUtils.java:1140)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExpression(StatsUtils.java:1584)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExprMap(StatsUtils.java:1424)
at
org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$SelectStatsRule.process(StatsRulesProcFactory.java:196)
at
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at
org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122)
at
org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78)
at
org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:397)
at
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:161)
at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:148)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12443)
at
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358)
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:664)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1863)
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1810)
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1805)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197)
at
org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:262)
at org.apache.hive.service.cli.operation.Operation.run(Operation.java:247)
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541)
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527)
at
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315)
at
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562)
at
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
at
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
was:
getWritableSize(ObjectInspector oi, Object value) method in
org.apache.hadoop.hive.ql.stats.StatsUtils class fails with NPE when 2nd
parameter (Object value) is null.
Attached is patch with unit test and fix
Issue was originally found when running UDF query against Hortonworks cluster
with HDP 3.1 running Hive 3.1.0. The issue occurs when executing the UDF
against a cluster using the tez execution engine
beeline hive configurations
set hive.execution.engine=tez;
set hive.fetch.task.conversion=none;
Attached is sample code with an implementation of a simple UDF that duplicates
the behavior.
steps to reproduce
on a Hortonworks cluster with HDP 3.1 deployed
-start beeline Hive session
-set above hive configurations
-add jar containing UDF from sample code
-create table containing one string column
create table tmptable(col1 string)
insert into table tmptable values ('somestring')
-create function bugUdf as 'BugUDF';
-select bugUdf from tmptable;
this will result in a null pointer exception similar to this
ql.Driver ()) - FAILED: NullPointerException nulljava.lang.NullPointerException
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getWritableSize(StatsUtils.java:1373)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfStruct(StatsUtils.java:1356)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfComplexTypes(StatsUtils.java:1212)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getAvgColLenOf(StatsUtils.java:1140)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExpression(StatsUtils.java:1584)
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExprMap(StatsUtils.java:1424)
at
org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$SelectStatsRule.process(StatsRulesProcFactory.java:196)
at
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at
org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122)
at
org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78)
at
org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:397)
at
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:161)
at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:148)
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12443)
at
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358)
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:664)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1863)
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1810)
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1805)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197)
at
org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:262)
at org.apache.hive.service.cli.operation.Operation.run(Operation.java:247)
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541)
at
org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527)
at
org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315)
at
org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562)
at
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
at
org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
> NPE in StatsUtils.getWritableSize() when value passed in is null
> ----------------------------------------------------------------
>
> Key: HIVE-21853
> URL: https://issues.apache.org/jira/browse/HIVE-21853
> Project: Hive
> Issue Type: Bug
> Environment: Hortonworks
> * Ambari version 2.7.3.0
> * HDP stack version 3.1
> * HDP stack repo version 3.1.0.0
> * stack vdf version 3.1.0.0-78
> Reporter: John LeBrun
> Priority: Major
> Attachments: HIVE-21853.patch, HIVE21853.java
>
>
> getWritableSize(ObjectInspector oi, Object value) method in
> org.apache.hadoop.hive.ql.stats.StatsUtils class fails with NPE when 2nd
> parameter (Object value) is null.
> Attached is patch with unit test and fix (HIVE-21853.patch)
> Issue was originally found when running UDF query against Hortonworks cluster
> with HDP 3.1 running Hive 3.1.0. The issue occurs when executing the UDF
> against a cluster using the tez execution engine
> beeline hive configurations
> set hive.execution.engine=tez;
> set hive.fetch.task.conversion=none;
> Attached is sample code with an implementation of a simple UDF that
> duplicates the behavior (HIVE21853.java).
> steps to reproduce
> on a Hortonworks cluster with HDP 3.1 deployed
> -start beeline Hive session
> -set above hive configurations
> -add jar containing UDF from sample code
> -create table containing one string column
> create table tmptable(col1 string)
> insert into table tmptable values ('somestring')
> -create function bugUdf as 'BugUDF';
> -select bugUdf from tmptable;
> this will result in a null pointer exception similar to this
> ql.Driver ()) - FAILED: NullPointerException
> nulljava.lang.NullPointerException
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getWritableSize(StatsUtils.java:1373)
>
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfStruct(StatsUtils.java:1356)
>
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfComplexTypes(StatsUtils.java:1212)
>
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getAvgColLenOf(StatsUtils.java:1140)
>
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExpression(StatsUtils.java:1584)
>
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExprMap(StatsUtils.java:1424)
>
> at
> org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$SelectStatsRule.process(StatsRulesProcFactory.java:196)
>
> at
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
>
> at
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
>
> at
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
>
> at
> org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122)
>
> at
> org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78)
>
> at
> org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:397)
>
> at
> org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:161)
>
> at
> org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:148)
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12443)
>
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358)
>
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
>
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:664)
> at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1863)
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1810)
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1805)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
>
> at
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197)
>
> at
> org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:262)
>
> at org.apache.hive.service.cli.operation.Operation.run(Operation.java:247)
> at
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541)
>
> at
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527)
>
> at
> org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315)
>
> at
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562)
>
> at
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
>
> at
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
>
> at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
> at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
> at
> org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
>
> at
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
>
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>
> at java.lang.Thread.run(Thread.java:748)
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)