[
https://issues.apache.org/jira/browse/YARN-2601?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14148155#comment-14148155
]
Aroop Maliakkal commented on YARN-2601:
---------------------------------------
As a workaround, we deleted the entries in /rmstore/ZKRMStateRoot/RMAppRoot and
restarted the RMs. Looks like that fixed the issue.
> RMs(HA RMS) can't enter active state
> ------------------------------------
>
> Key: YARN-2601
> URL: https://issues.apache.org/jira/browse/YARN-2601
> Project: Hadoop YARN
> Issue Type: Bug
> Reporter: Cindy Li
>
> 2014-09-24 15:04:04,527 DEBUG
> org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl: Processing
> event for application_1409048687352_0552 of type APP_REJECTED
> 2014-09-24 15:04:04,528 INFO
> org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl:
> application_1409048687352_0552 State change from NEW to FAILED
> 2014-09-24 15:04:04,528 DEBUG org.apache.hadoop.yarn.event.AsyncDispatcher:
> Dispatching the event
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent.EventType:
> APP_REMOVED
> 2014-09-24 15:04:04,528 DEBUG org.apache.hadoop.yarn.event.AsyncDispatcher:
> Dispatching the event
> org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent.EventType:
> APP_COMPLETED
> 2014-09-24 15:04:04,528 DEBUG
> org.apache.hadoop.yarn.server.resourcemanager.RMAppManager: RMAppManager
> processing event for application_1409048687352_0552 of type APP_COMPLETED
> 2014-09-24 15:04:04,528 WARN
> org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger: USER=b_hiveperf0
> OPERATION=Application Finished - Failed TARGET=RMAppManager
> RESULT=FAILURE DESCRIPTION=App failed with state: FAILED
> PERMISSIONS=hadoop tried to renew an expired token
> at
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.renewToken(AbstractDelegationTokenSecretManager.java:366)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.renewDelegationToken(FSNamesystem.java:6279)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.renewDelegationToken(NameNodeRpcServer.java:488)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.renewDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:923)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2020)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2016)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1650)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2014)
> APPID=application_1409048687352_0552
> 2014-09-24 15:04:04,529 DEBUG org.apache.hadoop.service.AbstractService:
> Service: RMActiveServices entered state STOPPED
> ....
> 2014-09-24 15:04:04,538 WARN
> org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger: USER=hadoop
> OPERATION=transitionToActive TARGET=RMHAProtocolService
> RESULT=FAILURE DESCRIPTION=Exception transitioning to active
> PERMISSIONS=Users [hadoop] are allowed
> 2014-09-24 15:04:04,539 WARN org.apache.hadoop.ha.ActiveStandbyElector:
> Exception handling the winning of election
> org.apache.hadoop.ha.ServiceFailedException: RM could not transition to Active
> at
> org.apache.hadoop.yarn.server.resourcemanager.EmbeddedElectorService.becomeActive(EmbeddedElectorService.java:118)
> at
> org.apache.hadoop.ha.ActiveStandbyElector.becomeActive(ActiveStandbyElector.java:804)
> at
> org.apache.hadoop.ha.ActiveStandbyElector.processResult(ActiveStandbyElector.java:415)
> at
> org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:596)
> at
> org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:495)
> Caused by: org.apache.hadoop.ha.ServiceFailedException: Error when
> transitioning to Active mode
> at
> org.apache.hadoop.yarn.server.resourcemanager.AdminService.transitionToActive(AdminService.java:292)
> at
> org.apache.hadoop.yarn.server.resourcemanager.EmbeddedElectorService.becomeActive(EmbeddedElectorService.java:116)
> ... 4 more
> Caused by: org.apache.hadoop.service.ServiceStateException:
> org.apache.hadoop.security.token.SecretManager$InvalidToken: hadoop tried to
> renew an expired token
> at
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.renewToken(AbstractDelegationTokenSecretManager.java:366)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.renewDelegationToken(FSNamesystem.java:6279)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.renewDelegationToken(NameNodeRpcServer.java:488)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.renewDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:923)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2020)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2016)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1650)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2014)
> at
> org.apache.hadoop.service.ServiceStateException.convert(ServiceStateException.java:59)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:204)
> at
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.startActiveServices(ResourceManager.java:832)
> at
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$1.run(ResourceManager.java:872)
> at
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$1.run(ResourceManager.java:869)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1650)
> at
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.transitionToActive(ResourceManager.java:869)
> at
> org.apache.hadoop.yarn.server.resourcemanager.AdminService.transitionToActive(AdminService.java:283)
> ... 5 more
> Caused by: org.apache.hadoop.security.token.SecretManager$InvalidToken:
> hadoop tried to renew an expired token
> at
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.renewToken(AbstractDelegationTokenSecretManager.java:366)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.renewDelegationToken(FSNamesystem.java:6279)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.renewDelegationToken(NameNodeRpcServer.java:488)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.renewDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:923)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2020)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2016)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1650)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2014)
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
> Method)
> at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
> at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
> at
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
> at
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
> at org.apache.hadoop.hdfs.DFSClient$Renewer.renew(DFSClient.java:1054)
> at org.apache.hadoop.security.token.Token.renew(Token.java:377)
> at
> org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer$1.run(DelegationTokenRenewer.java:473)
> at
> org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer$1.run(DelegationTokenRenewer.java:470)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1650)
> at
> org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer.renewToken(DelegationTokenRenewer.java:469)
> at
> org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer.handleAppSubmitEvent(DelegationTokenRenewer.java:391)
> at
> org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer.addApplicationSync(DelegationTokenRenewer.java:353)
> at
> org.apache.hadoop.yarn.server.resourcemanager.RMAppManager.recoverApplication(RMAppManager.java:326)
> at
> org.apache.hadoop.yarn.server.resourcemanager.RMAppManager.recover(RMAppManager.java:425)
> at
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.recover(ResourceManager.java:1025)
> at
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$RMActiveServices.serviceStart(ResourceManager.java:484)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> ... 13 more
> Caused by:
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken):
> hadoop tried to renew an expired token
> at
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.renewToken(AbstractDelegationTokenSecretManager.java:366)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.renewDelegationToken(FSNamesystem.java:6279)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.renewDelegationToken(NameNodeRpcServer.java:488)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.renewDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:923)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2020)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2016)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1650)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2014)
> at org.apache.hadoop.ipc.Client.call(Client.java:1410)
> at org.apache.hadoop.ipc.Client.call(Client.java:1363)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
> at com.sun.proxy.$Proxy77.renewDelegationToken(Unknown Source)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.renewDelegationToken(ClientNamenodeProtocolTranslatorPB.java:869)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:190)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:103)
> at com.sun.proxy.$Proxy78.renewDelegationToken(Unknown Source)
> at org.apache.hadoop.hdfs.DFSClient$Renewer.renew(DFSClient.java:1052)
> ... 27 more
> 2014-09-24 15:04:04,540 INFO org.apache.hadoop.ha.ActiveStandbyElector:
> Trying to re-establish ZK session
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)