[ 
https://issues.apache.org/jira/browse/YARN-4424?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15044141#comment-15044141
 ] 

Jian He commented on YARN-4424:
-------------------------------

This is a similar problem to YARN-2594
Thread 1 
{code}
Thread 53785: (state = BLOCKED)
 - sun.misc.Unsafe.park(boolean, long) @bci=0 (Compiled frame; information may 
be imprecise)
 - java.util.concurrent.locks.LockSupport.park(java.lang.Object) @bci=14, 
line=186 (Interpreted frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt() 
@bci=1, line=834 (Interpreted frame)
 - java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(int) 
@bci=83, line=964 (Interpreted frame)
 - java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(int) 
@bci=10, line=1282 (Interpreted frame)
 - java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock() @bci=5, 
line=731 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl.getFinalApplicationStatus()
 @bci=4, line=478 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher.appAttemptFinished(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt,
 org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState, 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp, long) @bci=45, 
line=162 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl$BaseFinalTransition.transition(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
 org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent) 
@bci=288, line=1300 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl$FinalTransition.transition(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
 org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent) 
@bci=9, line=1493 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl$FinalTransition.transition(java.lang.Object,
 java.lang.Object) @bci=9, line=1480 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl$FinalStateSavedTransition.transition(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
 org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent) 
@bci=24, line=1213 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl$FinalStateSavedTransition.transition(java.lang.Object,
 java.lang.Object) @bci=9, line=1205 (Interpreted frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(java.lang.Object,
 java.lang.Enum, java.lang.Object, java.lang.Enum) @bci=6, line=385 
(Interpreted frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(java.lang.Object, 
java.lang.Enum, java.lang.Enum, java.lang.Object) @bci=45, line=302 
(Interpreted frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory.access$300(org.apache.hadoop.yarn.state.StateMachineFactory,
 java.lang.Object, java.lang.Enum, java.lang.Enum, java.lang.Object) @bci=6, 
line=46 (Interpreted frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(java.lang.Enum,
 java.lang.Object) @bci=15, line=448 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl.handle(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent)
 @bci=65, line=784 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl.handle(org.apache.hadoop.yarn.event.Event)
 @bci=5, line=106 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$ApplicationAttemptEventDispatcher.handle(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent)
 @bci=53, line=815 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$ApplicationAttemptEventDispatcher.handle(org.apache.hadoop.yarn.event.Event)
 @bci=5, line=796 (Interpreted frame)
 - 
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(org.apache.hadoop.yarn.event.Event)
 @bci=88, line=183 (Interpreted frame)
 - org.apache.hadoop.yarn.event.AsyncDispatcher$1.run() @bci=140, line=109 
(Interpreted frame)
{code}
Thread 2
{code}
Thread 25723: (state = BLOCKED)
 - sun.misc.Unsafe.park(boolean, long) @bci=0 (Compiled frame; information may 
be imprecise)
 - java.util.concurrent.locks.LockSupport.park(java.lang.Object) @bci=14, 
line=186 (Interpreted frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt() 
@bci=1, line=834 (Interpreted frame)
 - java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(int) 
@bci=83, line=964 (Interpreted frame)
 - java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(int) 
@bci=10, line=1282 (Compiled frame)
 - java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock() @bci=5, 
line=731 (Compiled frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl.createAndGetApplicationReport(java.lang.String,
 boolean) @bci=4, line=598 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplications(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest,
 boolean) @bci=610, line=814 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplications(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest)
 @bci=3, line=681 (Interpreted frame)
 - org.apache.hadoop.yarn.server.webapp.AppsBlock$1.run() @bci=11, line=89 
(Interpreted frame)
 - org.apache.hadoop.yarn.server.webapp.AppsBlock$1.run() @bci=1, line=86 
(Interpreted frame)
 - 
java.security.AccessController.doPrivileged(java.security.PrivilegedExceptionAction,
 java.security.AccessControlContext) @bci=0 (Compiled frame)
 - javax.security.auth.Subject.doAs(javax.security.auth.Subject, 
java.security.PrivilegedExceptionAction) @bci=42, line=415 (Compiled frame)
 - 
org.apache.hadoop.security.UserGroupInformation.doAs(java.security.PrivilegedExceptionAction)
 @bci=14, line=1657 (Compiled frame)
 - org.apache.hadoop.yarn.server.webapp.AppsBlock.fetchData() @bci=166, line=84 
(Interpreted frame)
 - 
org.apache.hadoop.yarn.server.webapp.AppsBlock.render(org.apache.hadoop.yarn.webapp.view.HtmlBlock$Block)
 @bci=7, line=101 (Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlBlock.render() @bci=31, line=69 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlBlock.renderPartial() @bci=1, line=79 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.View.render(java.lang.Class) @bci=16, line=235 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlBlock$Block.subView(java.lang.Class) 
@bci=23, line=43 (Interpreted frame)
 - org.apache.hadoop.yarn.webapp.hamlet.Hamlet._(java.lang.Class) @bci=2, 
line=30347 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.webapp.AppsBlockWithMetrics.render(org.apache.hadoop.yarn.webapp.view.HtmlBlock$Block)
 @bci=12, line=30 (Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlBlock.render() @bci=31, line=69 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlBlock.renderPartial() @bci=1, line=79 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.View.render(java.lang.Class) @bci=16, line=235 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlPage$Page.subView(java.lang.Class) 
@bci=23, line=49 (Interpreted frame)
 - org.apache.hadoop.yarn.webapp.hamlet.HamletImpl$EImp._v(java.lang.Class) 
@bci=9, line=117 (Interpreted frame)
 - org.apache.hadoop.yarn.webapp.hamlet.Hamlet$TD._(java.lang.Class) @bci=2, 
line=845 (Interpreted frame)
 - 
org.apache.hadoop.yarn.webapp.view.TwoColumnLayout.render(org.apache.hadoop.yarn.webapp.hamlet.Hamlet$HTML)
 @bci=211, line=56 (Interpreted frame)
 - org.apache.hadoop.yarn.webapp.view.HtmlPage.render() @bci=35, line=82 
(Interpreted frame)
 - org.apache.hadoop.yarn.webapp.Dispatcher.render(java.lang.Class) @bci=13, 
line=197 (Interpreted frame)
 - 
org.apache.hadoop.yarn.webapp.Dispatcher.service(javax.servlet.http.HttpServletRequest,
 javax.servlet.http.HttpServletResponse) @bci=452, line=156 (Interpreted frame)
 - javax.servlet.http.HttpServlet.service(javax.servlet.ServletRequest, 
javax.servlet.ServletResponse) @bci=30, line=820 (Interpreted frame)
 - 
com.google.inject.servlet.ServletDefinition.doService(javax.servlet.ServletRequest,
 javax.servlet.ServletResponse) @bci=26, line=263 (Interpreted frame)
{code}

Thread 3
{code}
Thread 53696: (state = BLOCKED)
 - sun.misc.Unsafe.park(boolean, long) @bci=0 (Compiled frame; information may 
be imprecise)
 - java.util.concurrent.locks.LockSupport.park(java.lang.Object) @bci=14, 
line=186 (Interpreted frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt() 
@bci=1, line=834 (Interpreted frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(java.util.concurrent.locks.AbstractQueuedSynchronizer$Node,
 int) @bci=67, line=867 (Interpreted frame)
 - java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(int) @bci=17, 
line=1197 (Interpreted frame)
 - java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock() @bci=5, 
line=945 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl.pullRMNodeUpdates(java.util.Collection)
 @bci=4, line=584 (Interpreted frame)
 - 
org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService.allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)
 @bci=825, line=551 (Interpreted frame)
 - 
org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl.allocate(com.google.protobuf.RpcController,
 org.apache.hadoop.yarn.proto.YarnServiceProtos$AllocateRequestProto) @bci=14, 
line=60 (Interpreted frame)
 - 
org.apache.hadoop.yarn.proto.ApplicationMasterProtocol$ApplicationMasterProtocolService$2.callBlockingMethod(com.google.protobuf.Descriptors$MethodDescriptor,
 com.google.protobuf.RpcController, com.google.protobuf.Message) @bci=91, 
line=99 (Interpreted frame)
 - 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(org.apache.hadoop.ipc.RPC$Server,
 java.lang.String, org.apache.hadoop.io.Writable, long) @bci=246, line=616 
(Interpreted frame)
 - org.apache.hadoop.ipc.RPC$Server.call(org.apache.hadoop.ipc.RPC$RpcKind, 
java.lang.String, org.apache.hadoop.io.Writable, long) @bci=9, line=969 
(Interpreted frame)
 - org.apache.hadoop.ipc.Server$Handler$1.run() @bci=38, line=2151 (Interpreted 
frame)
 - org.apache.hadoop.ipc.Server$Handler$1.run() @bci=1, line=2147 (Interpreted 
frame)
 - 
java.security.AccessController.doPrivileged(java.security.PrivilegedExceptionAction,
 java.security.AccessControlContext) @bci=0 (Interpreted frame)
 - javax.security.auth.Subject.doAs(javax.security.auth.Subject, 
java.security.PrivilegedExceptionAction) @bci=42, line=415 (Interpreted frame)
 - 
org.apache.hadoop.security.UserGroupInformation.doAs(java.security.PrivilegedExceptionAction)
 @bci=14, line=1657 (Interpreted frame)
 - org.apache.hadoop.ipc.Server$Handler.run() @bci=315, line=2145 (Interpreted 
frame)
{code}

Thread 1: RMAppAttemptImpl ( holding RMAppAttemptImpl write lock) -> 
SystemMetricsPublisher#appAttemptFinished -> RMAppImpl (trying to acquire 
RMAppImpl read lock, which cannot be obtained because thread 3 had requested 
the write lock;
Thread 2: RMAppImpl (holding RMAppImpl read lock) -> 
createAndGetApplicationReport -> RMAppAttemptImpl (trying to acquire 
RMAppAttemptImpl read lock)


> YARN CLI command hangs
> ----------------------
>
>                 Key: YARN-4424
>                 URL: https://issues.apache.org/jira/browse/YARN-4424
>             Project: Hadoop YARN
>          Issue Type: Bug
>            Reporter: Yesha Vora
>            Assignee: Jian He
>            Priority: Blocker
>
> {code}
> yarn@XXX:/mnt/hadoopqe$ /usr/hdp/current/hadoop-yarn-client/bin/yarn 
> application -list -appStates NEW,NEW_SAVING,SUBMITTED,ACCEPTED,RUNNING
> 15/12/04 21:59:54 INFO impl.TimelineClientImpl: Timeline service address: 
> http://XXX:8188/ws/v1/timeline/
> 15/12/04 21:59:54 INFO client.RMProxy: Connecting to ResourceManager at 
> XXX/0.0.0.0:8050
> 15/12/04 21:59:55 INFO client.AHSProxy: Connecting to Application History 
> server at XXX/0.0.0.0:10200
> {code}
> {code:title=RM log}
> 2015-12-04 21:59:19,744 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 237000
> 2015-12-04 22:00:50,945 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 238000
> 2015-12-04 22:02:22,416 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 239000
> 2015-12-04 22:03:53,593 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 240000
> 2015-12-04 22:05:24,856 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 241000
> 2015-12-04 22:06:56,235 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 242000
> 2015-12-04 22:08:27,510 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 243000
> 2015-12-04 22:09:58,786 INFO  event.AsyncDispatcher 
> (AsyncDispatcher.java:handle(243)) - Size of event-queue is 244000
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to