[
https://issues.apache.org/jira/browse/HIVE-13730?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15279098#comment-15279098
]
Wei Zheng commented on HIVE-13730:
----------------------------------
It's stuck in an infinite while loop in
BytesBytesMultiHashMap.findKeySlotToWrite().
{code}
$ jps
90673 TezChild
90976 TezChild
90855 TezChild
91225 Jps
82923 RemoteMavenServer
90205 surefirebooter3625226115924096543.jar
90191 Launcher
90542 DAGAppMaster
$ jstack 90673
2016-05-10 15:13:47
Full thread dump Java HotSpot(TM) 64-Bit Server VM (25.74-b02 mixed mode):
"Attach Listener" #138 daemon prio=9 os_prio=31 tid=0x00007feea4800000
nid=0x3d3b waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"TezTaskEventRouter{attempt_1462916018098_0001_32_01_000000_0}" #134 daemon
prio=5 os_prio=31 tid=0x00007feea684f000 nid=0x692f waiting on condition
[0x0000700001be7000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000007bc9d6490> (a
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask$1.runInternal(LogicalIOProcessorRuntimeTask.java:773)
at org.apache.tez.common.RunnableWithNdc.run(RunnableWithNdc.java:35)
at java.lang.Thread.run(Thread.java:745)
"org.apache.hadoop.hdfs.PeerCache@35f41fc9" #22 daemon prio=5 os_prio=31
tid=0x00007feea686d800 nid=0x6a03 waiting on condition [0x0000700001cea000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at org.apache.hadoop.hdfs.PeerCache.run(PeerCache.java:244)
at org.apache.hadoop.hdfs.PeerCache.access$000(PeerCache.java:41)
at org.apache.hadoop.hdfs.PeerCache$1.run(PeerCache.java:119)
at java.lang.Thread.run(Thread.java:745)
"TaskHeartbeatThread" #15 daemon prio=5 os_prio=31 tid=0x00007feea310c000
nid=0x6403 waiting on condition [0x00007000019e1000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000007bcb6aa40> (a
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2163)
at
org.apache.tez.runtime.task.TaskReporter$HeartbeatCallable.call(TaskReporter.java:200)
at
org.apache.tez.runtime.task.TaskReporter$HeartbeatCallable.call(TaskReporter.java:128)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"IPC Parameter Sending Thread #0" #14 daemon prio=5 os_prio=31
tid=0x00007feea0979000 nid=0x6203 waiting on condition [0x00007000018de000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x000000078df78428> (a
java.util.concurrent.SynchronousQueue$TransferStack)
at
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:460)
at
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362)
at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:941)
at
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1066)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"IPC Client (1617838096) connection to /10.22.27.129:64289 from
application_1462916018098_0001" #13 daemon prio=5 os_prio=31
tid=0x00007feea11f6800 nid=0x6003 in Object.wait() [0x00007000017db000]
java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
at org.apache.hadoop.ipc.Client$Connection.waitForWork(Client.java:920)
- locked <0x000000078df52318> (a
org.apache.hadoop.ipc.Client$Connection)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:965)
"TezChild" #12 daemon prio=5 os_prio=31 tid=0x00007feea0a65000 nid=0x5e07
runnable [0x00007000016d7000]
java.lang.Thread.State: RUNNABLE
at
org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.findKeySlotToWrite(BytesBytesMultiHashMap.java:602)
at
org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.put(BytesBytesMultiHashMap.java:454)
at
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:646)
at
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:591)
at
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:528)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:641)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:655)
at
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:413)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:186)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:160)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:355)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:72)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:60)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:60)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:36)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"AsyncLogger-1" #11 daemon prio=5 os_prio=31 tid=0x00007feea1235000 nid=0x5a0f
waiting on condition [0x00007000015d5000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x000000078e0657c8> (a
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at
com.lmax.disruptor.BlockingWaitStrategy.waitFor(BlockingWaitStrategy.java:45)
at
com.lmax.disruptor.ProcessingSequenceBarrier.waitFor(ProcessingSequenceBarrier.java:55)
at
com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:123)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"Service Thread" #9 daemon prio=9 os_prio=31 tid=0x00007feea4801000 nid=0x5203
runnable [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C1 CompilerThread3" #8 daemon prio=9 os_prio=31 tid=0x00007feea3004800
nid=0x5003 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C2 CompilerThread2" #7 daemon prio=9 os_prio=31 tid=0x00007feea102c800
nid=0x4e03 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C2 CompilerThread1" #6 daemon prio=9 os_prio=31 tid=0x00007feea1803800
nid=0x4c03 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C2 CompilerThread0" #5 daemon prio=9 os_prio=31 tid=0x00007feea1801000
nid=0x4a03 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Signal Dispatcher" #4 daemon prio=9 os_prio=31 tid=0x00007feea081c800
nid=0x3e0f runnable [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Finalizer" #3 daemon prio=8 os_prio=31 tid=0x00007feea080f800 nid=0x3803 in
Object.wait() [0x0000700000d3a000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
- locked <0x000000078e1a8a90> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164)
at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:209)
"Reference Handler" #2 daemon prio=10 os_prio=31 tid=0x00007feea3845000
nid=0x3603 in Object.wait() [0x0000700000c37000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
at java.lang.Object.wait(Object.java:502)
at java.lang.ref.Reference.tryHandlePending(Reference.java:191)
- locked <0x000000078e1a8b28> (a java.lang.ref.Reference$Lock)
at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:153)
"main" #1 prio=5 os_prio=31 tid=0x00007feea2802000 nid=0x1703 waiting on
condition [0x0000700000219000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000007bcb6b0d8> (a
com.google.common.util.concurrent.ListenableFutureTask)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.FutureTask.awaitDone(FutureTask.java:429)
at java.util.concurrent.FutureTask.get(FutureTask.java:191)
at
org.apache.tez.runtime.task.TezTaskRunner2.run(TezTaskRunner2.java:158)
at org.apache.tez.runtime.task.TezChild.run(TezChild.java:264)
at org.apache.tez.runtime.task.TezChild.main(TezChild.java:508)
"VM Thread" os_prio=31 tid=0x00007feea102c000 nid=0x3403 runnable
"GC task thread#0 (ParallelGC)" os_prio=31 tid=0x00007feea101d000 nid=0x2403
runnable
"GC task thread#1 (ParallelGC)" os_prio=31 tid=0x00007feea080a800 nid=0x2603
runnable
"GC task thread#2 (ParallelGC)" os_prio=31 tid=0x00007feea3000000 nid=0x2803
runnable
"GC task thread#3 (ParallelGC)" os_prio=31 tid=0x00007feea0804000 nid=0x2a03
runnable
"GC task thread#4 (ParallelGC)" os_prio=31 tid=0x00007feea080d000 nid=0x2c03
runnable
"GC task thread#5 (ParallelGC)" os_prio=31 tid=0x00007feea080d800 nid=0x2e03
runnable
"GC task thread#6 (ParallelGC)" os_prio=31 tid=0x00007feea080e800 nid=0x3003
runnable
"GC task thread#7 (ParallelGC)" os_prio=31 tid=0x00007feea080f000 nid=0x3203
runnable
"VM Periodic Task Thread" os_prio=31 tid=0x00007feea481c800 nid=0x5403 waiting
on condition
JNI global references: 273
{code}
> hybridgrace_hashjoin_1.q test gets stuck
> ----------------------------------------
>
> Key: HIVE-13730
> URL: https://issues.apache.org/jira/browse/HIVE-13730
> Project: Hive
> Issue Type: Bug
> Components: Tez
> Affects Versions: 2.1.0
> Reporter: Vikram Dixit K
> Assignee: Wei Zheng
> Priority: Blocker
>
> I am seeing hybridgrace_hashjoin_1.q getting stuck on master.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)