[
https://issues.apache.org/jira/browse/YARN-11618?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jepson updated YARN-11618:
--------------------------
Description:
2023-11-18 04:34:22,767 INFO
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore:
RMStateStore state change from ACTIVE to FENCED
2023-11-18 04:34:22,768*{color:#DE350B} ERROR
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received
RMFatalEvent of type STATE_STORE_FENCED, caused by
org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode =
NodeExists{color}*
at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949)
at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915)
at
org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159)
at
org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44)
at
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129)
at
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125)
at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
at
org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122)
at
org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421)
at
org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204)
at
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385)
at
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
at
org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46)
at
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185)
at
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201)
at
org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127)
at java.lang.Thread.run(Thread.java:748)
2023-11-18 04:34:22,768 WARN
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager:
*{color:#FFAB00}Transitioning the resource manager to standby.{color}*
2023-11-18 04:34:22,768 INFO
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning RM
to Standby mode
2023-11-18 04:34:22,768 INFO
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning to
standby state
2023-11-18 04:34:22,768 WARN
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher:
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread
interrupted. Returning.
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler
38 on 23140, call Call#186992428 Retry#0
org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport
from 10.16.7.13:26779
org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application
with id 'application_1700065178014_0664' doesn't exist in RM. Please check that
the job submission was successful.
at
org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366)
at
org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219)
at
org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on
23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
listener on 23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
Responder
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on
23130
2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
listener on 23130
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on
8031
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
Responder
2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
listener on 8031
2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
Responder
was:
2023-11-18 04:34:22,767 INFO
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore:
RMStateStore state change from ACTIVE to FENCED
2023-11-18 04:34:22,768*{color:#DE350B} ERROR
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received
RMFatalEvent of type STATE_STORE_FENCED, caused by
org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode =
NodeExists{color}*
at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949)
at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915)
at
org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159)
at
org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44)
at
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129)
at
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125)
at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
at
org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122)
at
org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421)
at
org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204)
at
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385)
at
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
at
org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46)
at
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190)
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185)
at
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201)
at
org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127)
at java.lang.Thread.run(Thread.java:748)
2023-11-18 04:34:22,768 WARN
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager:
{color:#DE350B}*{color:#4C9AFF}Transitioning the resource manager to
standby.{color}*{color}
2023-11-18 04:34:22,768 INFO
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning RM
to Standby mode
2023-11-18 04:34:22,768 INFO
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning to
standby state
2023-11-18 04:34:22,768 WARN
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher:
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread
interrupted. Returning.
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler
38 on 23140, call Call#186992428 Retry#0
org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport
from 10.16.7.13:26779
org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application
with id 'application_1700065178014_0664' doesn't exist in RM. Please check that
the job submission was successful.
at
org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366)
at
org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219)
at
org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on
23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
listener on 23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
Responder
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on
23130
2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
listener on 23130
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on
8031
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
Responder
2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
listener on 8031
2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server
Responder
> Received RMFatalEvent of type STATE_STORE_FENCED
> ------------------------------------------------
>
> Key: YARN-11618
> URL: https://issues.apache.org/jira/browse/YARN-11618
> Project: Hadoop YARN
> Issue Type: Bug
> Components: resourcemanager
> Affects Versions: 2.9.2
> Reporter: Jepson
> Priority: Major
>
> 2023-11-18 04:34:22,767 INFO
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore:
> RMStateStore state change from ACTIVE to FENCED
> 2023-11-18 04:34:22,768*{color:#DE350B} ERROR
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received
> RMFatalEvent of type STATE_STORE_FENCED, caused by
> org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode =
> NodeExists{color}*
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949)
> at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915)
> at
> org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159)
> at
> org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44)
> at
> org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129)
> at
> org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122)
> at
> org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421)
> at
> org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365)
> at
> org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829)
> at
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222)
> at
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204)
> at
> org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385)
> at
> org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
> at
> org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46)
> at
> org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487)
> at
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112)
> at
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190)
> at
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185)
> at
> org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201)
> at
> org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127)
> at java.lang.Thread.run(Thread.java:748)
> 2023-11-18 04:34:22,768 WARN
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager:
> *{color:#FFAB00}Transitioning the resource manager to standby.{color}*
> 2023-11-18 04:34:22,768 INFO
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning
> RM to Standby mode
> 2023-11-18 04:34:22,768 INFO
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning
> to standby state
> 2023-11-18 04:34:22,768 WARN
> org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher:
>
> org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread
> interrupted. Returning.
> 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler
> 38 on 23140, call Call#186992428 Retry#0
> org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport
> from 10.16.7.13:26779
> org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application
> with id 'application_1700065178014_0664' doesn't exist in RM. Please check
> that the job submission was successful.
> at
> org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366)
> at
> org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219)
> at
> org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
> 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on
> 23140
> 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC
> Server listener on 23140
> 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC
> Server Responder
> 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on
> 23130
> 2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC
> Server listener on 23130
> 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on
> 8031
> 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC
> Server Responder
> 2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC
> Server listener on 8031
> 2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC
> Server Responder
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]