[
https://issues.apache.org/jira/browse/HIVE-22633?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
zhangbutao updated HIVE-22633:
------------------------------
Description:
if hive.map.aggr and hive.groupby.skewindata set true,exception will be thrown.
step to repro:
1. create table:
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
create table test1 (id1 bigint);
create table test2 (id2 bigint) partitioned by(dt2 string);
insert into test2 partition(dt2='2020') select a.id1 from test1 a group by
a.id1;
2.NullPointerException:
{code:java}
], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) :
attempt_1585641455670_0001_2_03_000000_2:java.lang.RuntimeException:
java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFComputeStats$GenericUDAFNumericStatsEvaluator.init(GenericUDAFComputeStats.java:373)
at
org.apache.hadoop.hive.ql.exec.GroupByOperator.initializeOp(GroupByOperator.java:373)
at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360)
at
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:191)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266)
{code}
was:
step to repo:
# Create table using attachment *table1.sql* and *table2.sql* ;
# Query with attachment *groupbyquery.sql* with conf set
hive.groupby.skewindata=true; and set hive.map.aggr=true;
# Error logs:
{code:java}
attempt_1573963821463_867118_1_03_000000_0:java.lang.RuntimeException:
java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1685)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
at
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
at
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFComputeStats$GenericUDAFNumericStatsEvaluator.init(GenericUDAFComputeStats.java:369)
at
org.apache.hadoop.hive.ql.exec.GroupByOperator.initializeOp(GroupByOperator.java:373)
at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376)
at
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:211)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266)
... 16 more{code}
By the way, query could be succeed with conf set
hive.groupby.skewindata=true; and *set hive.map.aggr=false*;
> GROUP BY query with SET hive.groupby.skewindata=true causes
> "java.lang.NullPointerException"
> ---------------------------------------------------------------------------------------------
>
> Key: HIVE-22633
> URL: https://issues.apache.org/jira/browse/HIVE-22633
> Project: Hive
> Issue Type: Bug
> Affects Versions: 3.1.0, 4.0.0, 3.1.1
> Reporter: zhangbutao
> Priority: Major
>
> if hive.map.aggr and hive.groupby.skewindata set true,exception will be
> thrown.
> step to repro:
> 1. create table:
> set hive.map.aggr=true;
> set hive.groupby.skewindata=true;
> create table test1 (id1 bigint);
> create table test2 (id2 bigint) partitioned by(dt2 string);
> insert into test2 partition(dt2='2020') select a.id1 from test1 a group by
> a.id1;
> 2.NullPointerException:
> {code:java}
> ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) :
> attempt_1585641455670_0001_2_03_000000_2:java.lang.RuntimeException:
> java.lang.NullPointerException
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
> at
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
> at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
> at
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: java.lang.NullPointerException
> at
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFComputeStats$GenericUDAFNumericStatsEvaluator.init(GenericUDAFComputeStats.java:373)
> at
> org.apache.hadoop.hive.ql.exec.GroupByOperator.initializeOp(GroupByOperator.java:373)
> at
> org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:360)
> at
> org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:191)
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266)
> {code}
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)