[ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

FrankieLee updated HIVE-25671:
------------------------------
    Description: 
{format}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
        at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
        at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
        at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
        at 
org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
        at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
        at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
        at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
        at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
{format}

  was:

{format}
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Deserializing spilled hash partition...
2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Number of rows in hashmap: 1
2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid Grace 
Hash Join: Going to process spilled big table rows in partition 5. Number of 
rows: 1
2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
exception from MapJoinOperator : null
java.lang.NullPointerException
        at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
        at 
org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
        at 
org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
        at 
org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
        at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
        at 
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
        at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
        at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
{format}


Seams that it is a bug of the process of Hive Hashjoin when deal with rcfile : 
using map write from mem to disk is ok and the problem occures in map read from 
disk. and rcfile is not a popular format for Hive ,if you do not using this 
file format or donnot using the feature that flash the information from mem to 
disk ,this problem maybe avoied ,but at the seam time oom maybe occured instead.

> Hybrid Grace Hash Join NullPointer When query RCFile
> ----------------------------------------------------
>
>                 Key: HIVE-25671
>                 URL: https://issues.apache.org/jira/browse/HIVE-25671
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 3.1.2
>            Reporter: Nemon Lou
>            Priority: Major
>
> {format}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>       at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>       at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>       at 
> org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
>       at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
>       at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at 
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
>       at 
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
>       at 
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> {format}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to