[ 
https://issues.apache.org/jira/browse/HIVE-14363?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15400802#comment-15400802
 ] 

Ashutosh Chauhan commented on HIVE-14363:
-----------------------------------------

[~hsubramaniyan] Please commit if failures are not related.

> bucketmap inner join query fails due to NullPointerException in some cases
> --------------------------------------------------------------------------
>
>                 Key: HIVE-14363
>                 URL: https://issues.apache.org/jira/browse/HIVE-14363
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 2.2.0
>            Reporter: Jagruti Varia
>            Assignee: Hari Sankar Sivarama Subramaniyan
>         Attachments: HIVE-14363.1.patch
>
>
> Bucketmap inner join query between bucketed tables throws following exception 
> when one table contains all the empty buckets while other has all the 
> non-empty buckets.
> {noformat}
> Vertex failed, vertexName=Map 2, vertexId=vertex_1466710232033_0432_4_01, 
> diagnostics=[Task failed, taskId=task_1466710232033_0432_4_01_000000, 
> diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( 
> failure ) : 
> attempt_1466710232033_0432_4_01_000000_0:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_1:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_2:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ], TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_3:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 
> killedTasks:59, Vertex vertex_1466710232033_0432_4_01 [Map 2] killed/failed 
> due to:OWN_TASK_FAILURE]
> DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:0
> FAILED: Execution Error, return code 2 from 
> org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Map 2, 
> vertexId=vertex_1466710232033_0432_4_01, diagnostics=[Task failed, 
> taskId=task_1466710232033_0432_4_01_000000, diagnostics=[TaskAttempt 0 
> failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_0:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_1:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_2:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> ], TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : 
> attempt_1466710232033_0432_4_01_000000_3:java.lang.RuntimeException: 
> java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Map operator initialization failed
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184)
>       ... 14 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292)
>       ... 15 more
> {noformat}
> Steps to reproduce the issue:
> {noformat}
> set hive.execution.engine=tez;
> set hive.exeucution.mode=container;
> drop table if exists src_emptybucket;
> create table src_emptybucket (name string, age int, gpa double)
> clustered by (age)
> into 30 buckets
> stored as orc;
> insert into table src_emptybucket
> select * from studenttab10k limit 0;
> drop table if exists src_nonemptybucket;
> create table src_nonemptybucket (name varchar(50), age int, gpa 
> decimal(38,18))
> clustered by (age)
> into 60 buckets
> stored as orc;
> insert into table src_nonemptybucket
> select * from studenttab10k 
> where age in (
> select distinct(age) from studenttab10k 
> limit 60);
> set hive.optimize.bucketmapjoin=true;
> set hive.convert.join.bucket.mapjoin.tez=true;
> select e1.name as e1_name, e1.age as e1_age, e1.gpa as e1_gpa, e2.name as 
> e2_name, e2.age as e2_age, e2.gpa as e2_gpa from src_emptybucket e1 inner 
> join src_nonemptybucket e2 on e1.age = e2.age;
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to