[
https://issues.apache.org/jira/browse/TEZ-2267?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jeff Zhang updated TEZ-2267:
----------------------------
Description:
{code}
"TaskSchedulerAppCaller #0" daemon prio=10 tid=0x00007f044005e800 nid=0x7be6
waiting on condition [0x00007f04350ce000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000fc279e18> (a
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:964)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1282)
at
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:731)
at org.apache.tez.dag.app.dag.impl.TaskImpl.isFinished(TaskImpl.java:394)
at
org.apache.tez.dag.app.dag.impl.VertexImpl.computeProgress(VertexImpl.java:1064)
at
org.apache.tez.dag.app.dag.impl.VertexImpl.getProgress(VertexImpl.java:1002)
at org.apache.tez.dag.app.dag.impl.DAGImpl.getProgress(DAGImpl.java:676)
at org.apache.tez.dag.app.DAGAppMaster.getProgress(DAGAppMaster.java:1067)
at
org.apache.tez.dag.app.rm.TaskSchedulerEventHandler.getProgress(TaskSchedulerEventHandler.java:558)
at
org.apache.tez.dag.app.rm.TaskSchedulerAppCallbackWrapper$GetProgressCallable.call(TaskSchedulerAppCallbackWrapper.java:291)
at
org.apache.tez.dag.app.rm.TaskSchedulerAppCallbackWrapper$GetProgressCallable.call(TaskSchedulerAppCallbackWrapper.java:1)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Locked ownable synchronizers:
- <0x00000000fc20aed8> (a java.util.concurrent.ThreadPoolExecutor$Worker)
"IPC Server handler 0 on 47949" daemon prio=10 tid=0x00007f0448036800
nid=0x7bc0 waiting on condition [0x00007f04372f0000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000fc200160> (a
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2090)
at org.apache.tez.dag.app.dag.impl.DAGImpl.getDAGStatus(DAGImpl.java:763)
at
org.apache.tez.dag.api.client.DAGClientHandler.getDAGStatus(DAGClientHandler.java:67)
at
org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPBServerImpl.getDAGStatus(DAGClientAMProtocolBlockingPBServerImpl.java:99)
at
org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC$DAGClientAMProtocol$2.callBlockingMethod(DAGClientAMProtocolRPC.java:7465)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:619)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:962)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2039)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2035)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2033)
Locked ownable synchronizers:
- None
{code}
> Deadlock caused by TEZ-2149
> ---------------------------
>
> Key: TEZ-2267
> URL: https://issues.apache.org/jira/browse/TEZ-2267
> Project: Apache Tez
> Issue Type: Bug
> Reporter: Jeff Zhang
> Priority: Critical
>
> {code}
> "TaskSchedulerAppCaller #0" daemon prio=10 tid=0x00007f044005e800 nid=0x7be6
> waiting on condition [0x00007f04350ce000]
> java.lang.Thread.State: WAITING (parking)
> at sun.misc.Unsafe.park(Native Method)
> - parking to wait for <0x00000000fc279e18> (a
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
> at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:964)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1282)
> at
> java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:731)
> at org.apache.tez.dag.app.dag.impl.TaskImpl.isFinished(TaskImpl.java:394)
> at
> org.apache.tez.dag.app.dag.impl.VertexImpl.computeProgress(VertexImpl.java:1064)
> at
> org.apache.tez.dag.app.dag.impl.VertexImpl.getProgress(VertexImpl.java:1002)
> at org.apache.tez.dag.app.dag.impl.DAGImpl.getProgress(DAGImpl.java:676)
> at org.apache.tez.dag.app.DAGAppMaster.getProgress(DAGAppMaster.java:1067)
> at
> org.apache.tez.dag.app.rm.TaskSchedulerEventHandler.getProgress(TaskSchedulerEventHandler.java:558)
> at
> org.apache.tez.dag.app.rm.TaskSchedulerAppCallbackWrapper$GetProgressCallable.call(TaskSchedulerAppCallbackWrapper.java:291)
> at
> org.apache.tez.dag.app.rm.TaskSchedulerAppCallbackWrapper$GetProgressCallable.call(TaskSchedulerAppCallbackWrapper.java:1)
> at java.util.concurrent.FutureTask.run(FutureTask.java:262)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> at java.lang.Thread.run(Thread.java:745)
> Locked ownable synchronizers:
> - <0x00000000fc20aed8> (a java.util.concurrent.ThreadPoolExecutor$Worker)
> "IPC Server handler 0 on 47949" daemon prio=10 tid=0x00007f0448036800
> nid=0x7bc0 waiting on condition [0x00007f04372f0000]
> java.lang.Thread.State: WAITING (parking)
> at sun.misc.Unsafe.park(Native Method)
> - parking to wait for <0x00000000fc200160> (a
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
> at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2090)
> at org.apache.tez.dag.app.dag.impl.DAGImpl.getDAGStatus(DAGImpl.java:763)
> at
> org.apache.tez.dag.api.client.DAGClientHandler.getDAGStatus(DAGClientHandler.java:67)
> at
> org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPBServerImpl.getDAGStatus(DAGClientAMProtocolBlockingPBServerImpl.java:99)
> at
> org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC$DAGClientAMProtocol$2.callBlockingMethod(DAGClientAMProtocolRPC.java:7465)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:619)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:962)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2039)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2035)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2033)
> Locked ownable synchronizers:
> - None
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)