[
https://issues.apache.org/jira/browse/YARN-6607?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16405907#comment-16405907
]
gouri shankar commented on YARN-6607:
-------------------------------------
Stack TraceĀ
FATAL event.AsyncDispatcher (AsyncDispatcher.java:dispatch(190)) - Error in
dispatcher thread
java.util.concurrent.RejectedExecutionException: Task
java.util.concurrent.FutureTask@75755d80 rejected from
java.util.concurrent.ThreadPoolExecutor@5a46169[Terminated, pool size = 0,
active threads = 0, queued tasks = 0, completed tasks = 80564]
at
java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2047)
at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:823)
at
java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1369)
at
java.util.concurrent.AbstractExecutorService.submit(AbstractExecutorService.java:134)
at
org.apache.hadoop.registry.server.services.RegistryAdminService.submit(RegistryAdminService.java:176)
at
org.apache.hadoop.registry.server.integration.RMRegistryOperationsService.purgeRecordsAsync(RMRegistryOperationsService.java:200)
at
org.apache.hadoop.registry.server.integration.RMRegistryOperationsService.purgeRecordsAsync(RMRegistryOperationsService.java:170)
at
org.apache.hadoop.registry.server.integration.RMRegistryOperationsService.onContainerFinished(RMRegistryOperationsService.java:146)
at
org.apache.hadoop.yarn.server.resourcemanager.registry.RMRegistryService.handleAppAttemptEvent(RMRegistryService.java:156)
at
org.apache.hadoop.yarn.server.resourcemanager.registry.RMRegistryService$AppEventHandler.handle(RMRegistryService.java:188)
at
org.apache.hadoop.yarn.server.resourcemanager.registry.RMRegistryService$AppEventHandler.handle(RMRegistryService.java:182)
at
org.apache.hadoop.yarn.event.AsyncDispatcher$MultiListenerHandler.handle(AsyncDispatcher.java:279)
at
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184)
at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110)
at java.lang.Thread.run(Thread.java:745)
2018-03-20 06:27:52,787 INFO resourcemanager.RMAuditLogger
(RMAuditLogger.java:logSuccess(141)) - USER=svc-tap-prod OPERATION=AM Released
Container TARGET=SchedulerApp RESULT=SUCCESS
APPID=application_1521520492173_0200
CONTAINERID=container_e335_1521520492173_0200_01_001733
2018-03-20 06:27:52,851 INFO scheduler.SchedulerNode
(SchedulerNode.java:releaseContainer(220)) - Released container
container_e335_1521520492173_0200_01_001733 of capacity <memory:23450,
vCores:1> on host pphdpworkr119xx.global.tesco.org:45454, which currently has
17 containers, <memory:332990, vCores:24> used and <memory:167010, vCores:14>
available, release resources=true
2018-03-20 06:27:52,851 INFO zookeeper.ClientCnxn
(ClientCnxn.java:primeConnection(864)) - Socket connection established,
initiating session, client: /172.17.136.13:44794, server:
pphdpmanag002xx.global.tesco.org/172.17.136.12:2181
2018-03-20 06:27:52,819 INFO zookeeper.ClientCnxn
(ClientCnxn.java:primeConnection(864)) - Socket connection established,
initiating session, client: /172.17.136.13:50905, server:
pphdpmanag003xx.global.tesco.org/172.17.136.13:2181
2018-03-20 06:27:52,882 INFO event.AsyncDispatcher
(AsyncDispatcher.java:serviceStop(142)) - AsyncDispatcher is draining to stop,
igonring any new events.
2018-03-20 06:27:52,882 INFO zookeeper.ClientCnxn
(ClientCnxn.java:onConnected(1279)) - Session establishment complete on server
pphdpmanag003xx.global.tesco.org/172.17.136.13:2181, sessionid =
0x2622a2c989a06e8, negotiated timeout = 10000
2018-03-20 06:27:52,882 INFO zookeeper.ClientCnxn
(ClientCnxn.java:onConnected(1279)) - Session establishment complete on server
pphdpmanag002xx.global.tesco.org/172.17.136.12:2181, sessionid =
0x1622cd06bfd05b0, negotiated timeout = 10000
2018-03-20 06:27:52,883 INFO cloud.ConnectionManager
(ConnectionManager.java:process(104)) - Watcher
org.apache.solr.common.cloud.ConnectionManager@3882ddf4
name:ZooKeeperConnection
Watcher:pphdpmanag002xx.global.tesco.org:2181,pphdpmanag003xx.global.tesco.org:2181,pphdpmanag004xx.global.tesco.org:2181/infra-solr
got event WatchedEvent state:SyncConnected type:None path:null path:null
type:None
2018-03-20 06:27:52,945 INFO cloud.ConnectionManager
(ConnectionManager.java:waitForConnected(230)) - Client is connected to
ZooKeeper
2018-03-20 06:27:52,945 INFO util.AbstractLivelinessMonitor
(AbstractLivelinessMonitor.java:run(139)) - AMLivelinessMonitor thread
interrupted
2018-03-20 06:27:52,945 INFO util.AbstractLivelinessMonitor
(AbstractLivelinessMonitor.java:run(139)) - AMLivelinessMonitor thread
interrupted
2018-03-20 06:27:52,946 ERROR delegation.AbstractDelegationTokenSecretManager
(AbstractDelegationTokenSecretManager.java:run(659)) - ExpiredTokenRemover
received java.lang.InterruptedException: sleep interrupted
2018-03-20 06:27:52,946 INFO cloud.ConnectionManager
(ConnectionManager.java:update(149)) - Connection with ZooKeeper reestablished.
2018-03-20 06:27:52,946 INFO util.AbstractLivelinessMonitor
(AbstractLivelinessMonitor.java:run(139)) -
org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer
thread interrupted
2018-03-20 06:27:52,946 INFO cloud.ZkStateReader
(ZkStateReader.java:createClusterStateWatchersAndUpdate(286)) - Updating
cluster state from ZooKeeper...
2018-03-20 06:27:52,981 INFO impl.MetricsSystemImpl
(MetricsSystemImpl.java:stop(211)) - Stopping ResourceManager metrics system...
2018-03-20 06:27:52,982 INFO impl.MetricsSinkAdapter
(MetricsSinkAdapter.java:publishMetricsFromQueue(141)) - timeline thread
interrupted.
2018-03-20 06:27:52,987 INFO cloud.ConnectionManager
(ConnectionManager.java:process(104)) - Watcher
org.apache.solr.common.cloud.ConnectionManager@3882ddf4
name:ZooKeeperConnection
Watcher:pphdpmanag002xx.global.tesco.org:2181,pphdpmanag003xx.global.tesco.org:2181,pphdpmanag004xx.global.tesco.org:2181/infra-solr
got event WatchedEvent state:SaslAuthenticated type:None path:null path:null
type:None
2018-03-20 06:27:52,989 INFO cloud.DefaultConnectionStrategy
(DefaultConnectionStrategy.java:reconnect(59)) - Reconnected to ZooKeeper
2018-03-20 06:27:52,989 INFO cloud.ConnectionManager
(ConnectionManager.java:process(177)) - Connected:true
2018-03-20 06:27:52,990 INFO zookeeper.ClientCnxn (ClientCnxn.java:run(524)) -
EventThread shut down
2018-03-20 06:27:52,990 INFO recovery.ZKRMStateStore
(ZKRMStateStore.java:processWatchEvent(918)) - Watcher event type: None with
state:SyncConnected for path:null for Service
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore in state
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: STARTED
2018-03-20 06:27:52,990 ERROR recovery.RMStateStore
(RMStateStore.java:notifyStoreOperationFailedInternal(992)) - State store
operation failed
org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException:
RMStateStore has been fenced
at
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore$VerifyActiveStatusThread.run(ZKRMStateStore.java:1149)
2018-03-20 06:27:53,023 INFO recovery.ZKRMStateStore
(ZKRMStateStore.java:processWatchEvent(926)) - ZKRMStateStore Session connected
2018-03-20 06:27:53,023 INFO recovery.ZKRMStateStore
(ZKRMStateStore.java:processWatchEvent(911)) - Ignore watcher event type: None
with state:Disconnected for path:null from old session
2018-03-20 06:27:53,024 INFO recovery.ZKRMStateStore
(ZKRMStateStore.java:processWatchEvent(918)) - Watcher event type: None with
state:SaslAuthenticated for path:null for Service
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore in state
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: STARTED
2018-03-20 06:27:53,024 ERROR recovery.ZKRMStateStore
(ZKRMStateStore.java:processWatchEvent(946)) - Unexpected Zookeeper watch event
state: SaslAuthenticated
2018-03-20 06:27:53,024 INFO recovery.ZKRMStateStore
(ZKRMStateStore.java:processResult(122)) - ZooKeeper sync operation succeeded.
path: /rmstore/ZKRMStateRoot
2018-03-20 06:27:53,024 INFO recovery.ZKRMStateStore
(ZKRMStateStore.java:processWatchEvent(911)) - Ignore watcher event type: None
with state:Expired for path:null from old session
2018-03-20 06:27:53,024 WARN recovery.RMStateStore
(RMStateStore.java:notifyStoreOperationFailedInternal(994)) - State-store
fenced ! Transitioning RM to standby
2018-03-20 06:27:53,024 INFO zookeeper.ClientCnxn (ClientCnxn.java:run(524)) -
EventThread shut down
2018-03-20 06:27:53,058 INFO recovery.RMStateStore
(RMStateStore.java:run(1067)) - RMStateStore has been fenced
2018-03-20 06:27:53,058 INFO recovery.RMStateStore
(RMStateStore.java:handleStoreEvent(963)) - RMStateStore state change from
ACTIVE to FENCED
2018-03-20 06:27:53,059 INFO resourcemanager.ResourceManager
(ResourceManager.java:handleTransitionToStandBy(777)) - Transitioning RM to
Standby mode
2018-03-20 06:27:53,534 INFO impl.MetricsSystemImpl
(MetricsSystemImpl.java:stop(217)) - ResourceManager metrics system stopped.
2018-03-20 06:27:54,211 INFO impl.MetricsSystemImpl
(MetricsSystemImpl.java:shutdown(606)) - ResourceManager metrics system
shutdown complete.
2018-03-20 06:27:54,211 INFO event.AsyncDispatcher
(AsyncDispatcher.java:serviceStop(142)) - AsyncDispatcher is draining to stop,
igonring any new events.
2018-03-20 06:27:54,211 INFO mortbay.log (Slf4jLog.java:info(67)) - Stopped
[email protected]:8090
2018-03-20 06:27:54,399 INFO zookeeper.ZooKeeper (ZooKeeper.java:close(684)) -
Session: 0x1622cd06bfd05b0 closed
2018-03-20 06:27:54,430 INFO zookeeper.ClientCnxn (ClientCnxn.java:run(524)) -
EventThread shut down
2018-03-20 06:27:54,617 INFO event.AsyncDispatcher
(AsyncDispatcher.java:register(209)) - Registering class
org.apache.hadoop.yarn.server.resourcemanager.RMFatalEventType for class
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$RMFatalEventDispatcher
> YARN Resource Manager quits with the exception
> java.util.concurrent.RejectedExecutionException:
> ------------------------------------------------------------------------------------------------
>
> Key: YARN-6607
> URL: https://issues.apache.org/jira/browse/YARN-6607
> Project: Hadoop YARN
> Issue Type: Bug
> Components: resourcemanager
> Affects Versions: 2.7.1
> Reporter: Anandhaprabhu
> Priority: Major
>
> ResourceManager goes down frequently with the below exception
> 2017-05-16 03:32:36,897 FATAL event.AsyncDispatcher
> (AsyncDispatcher.java:dispatch(189)) - Error in dispatcher thread
> java.util.concurrent.RejectedExecutionException: Task
> java.util.concurrent.FutureTask@9efeac9 rejected from
> java.util.concurrent.ThreadPoolExecutor@42ab30[Shutting down, pool size = 16,
> active threads = 0, queued tasks = 0, completed tasks = 223337]
> at
> java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2047)
> at
> java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:823)
> at
> java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1369)
> at
> java.util.concurrent.AbstractExecutorService.submit(AbstractExecutorService.java:134)
> at
> org.apache.hadoop.registry.server.services.RegistryAdminService.submit(RegistryAdminService.java:176)
> at
> org.apache.hadoop.registry.server.integration.RMRegistryOperationsService.purgeRecordsAsync(RMRegistryOperationsService.java:200)
> at
> org.apache.hadoop.registry.server.integration.RMRegistryOperationsService.purgeRecordsAsync(RMRegistryOperationsService.java:170)
> at
> org.apache.hadoop.registry.server.integration.RMRegistryOperationsService.onContainerFinished(RMRegistryOperationsService.java:146)
> at
> org.apache.hadoop.yarn.server.resourcemanager.registry.RMRegistryService.handleAppAttemptEvent(RMRegistryService.java:151)
> at
> org.apache.hadoop.yarn.server.resourcemanager.registry.RMRegistryService$AppEventHandler.handle(RMRegistryService.java:183)
> at
> org.apache.hadoop.yarn.server.resourcemanager.registry.RMRegistryService$AppEventHandler.handle(RMRegistryService.java:177)
> at
> org.apache.hadoop.yarn.event.AsyncDispatcher$MultiListenerHandler.handle(AsyncDispatcher.java:276)
> at
> org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:183)
> at
> org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:109)
> at java.lang.Thread.run(Thread.java:745)
> 2017-05-16 03:32:36,898 INFO zookeeper.ClientCnxn (ClientCnxn.java:run(524))
> - EventThread shut down
> 2017-05-16 03:32:36,898 INFO zookeeper.ZooKeeper (ZooKeeper.java:close(684))
> - Session: 0x15b8703e986b750 closed
> 2017-05-16 03:32:36,898 INFO capacity.ParentQueue
> (ParentQueue.java:completedContainer(623)) - completedContainer queue=high
> usedCapacity=0.41496983 absoluteUsedCapacity=0.29047886 used=<memory:3469312,
> vCores:847> cluster=<memory:11943424, vCores:6064>
> 2017-05-16 03:32:36,905 INFO capacity.ParentQueue
> (ParentQueue.java:completedContainer(640)) - Re-sorting completed queue:
> root.high.lawful stats: lawful: capacity=0.3, absoluteCapacity=0.21000001,
> usedResources=<memory:417792, vCores:102>, usedCapacity=0.16657583,
> absoluteUsedCapacity=0.034980923, numApps=19, numContainers=102
> 2017-05-16 03:32:36,905 INFO capacity.ParentQueue
> (ParentQueue.java:completedContainer(623)) - completedContainer queue=root
> usedCapacity=0.41565567 absoluteUsedCapacity=0.41565567 used=<memory:4964352,
> vCores:1212> cluster=<memory:11943424, vCores:6064>
> 2017-05-16 03:32:36,906 INFO capacity.ParentQueue
> (ParentQueue.java:completedContainer(640)) - Re-sorting completed queue:
> root.high stats: high: numChildQueue= 4, capacity=0.7, absoluteCapacity=0.7,
> usedResources=<memory:3469312, vCores:847>usedCapacity=0.41496983,
> numApps=61, numContainers=847
> 2017-05-16 03:32:36,906 INFO capacity.CapacityScheduler
> (CapacityScheduler.java:completedContainer(1562)) - Application attempt
> appattempt_1494886223429_7023_000001 released container
> container_e43_1494886223429_7023_01_000043 on node: host:
> r13d8.hadoop.log10.blackberry:45454 #containers=1 available=<memory:36864,
> vCores:23> used=<memory:4096, vCores:1> with event: FINISHED
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]