[
https://issues.apache.org/jira/browse/YARN-7880?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jiandan Yang updated YARN-7880:
--------------------------------
Description:
sls test case: node count = 9000, job count=10k,task num of job = 500, task run
time = 100s, but it does not occur when node count = 500 and 2000.
{code}
18/02/02 20:54:28 INFO rmcontainer.RMContainerImpl:
container_1517575125794_5707_01_000086 Container Transitioned from ACQUIRED to
RUNNING
java.lang.NullPointerException
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.commonCheckContainerAllocation(FiCaSchedulerApp.java:324)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.accept(FiCaSchedulerApp.java:420)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.tryCommit(CapacityScheduler.java:2506)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler$ResourceCommitterService.run(CapacityScheduler.java:541)
{code}
some CapacityScheduler$AsyncScheduleThread also throws NPE
{code}
java.lang.NullPointerException
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.getResourceRequests(SchedulerApplicationAttempt.java:1341)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.canAssign(RegularContainerAllocator.java:302)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignOffSwitchContainers(RegularContainerAllocator.java:389)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainersOnNode(RegularContainerAllocator.java:470)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.tryAllocateOnNode(RegularContainerAllocator.java:252)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.allocate(RegularContainerAllocator.java:816)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainers(RegularContainerAllocator.java:854)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator.assignContainers(ContainerAllocator.java:54)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.assignContainers(FiCaSchedulerApp.java:856)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.assignContainers(LeafQueue.java:1111)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue.assignContainersToChildQueues(ParentQueue.java:735)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue.assignContainers(ParentQueue.java:559)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateOrReserveNewContainers(CapacityScheduler.java:1343)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainerOnSingleNode(CapacityScheduler.java:1337)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(CapacityScheduler.java:1434)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(CapacityScheduler.java:1199)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.schedule(CapacityScheduler.java:474)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler$AsyncScheduleThread.run(CapacityScheduler.java:501)
{code}
was:
sls test case: node count = 9000, job count=10k,task num of job = 500, task run
time = 100s
{code}
18/02/02 20:54:28 INFO rmcontainer.RMContainerImpl:
container_1517575125794_5707_01_000086 Container Transitioned from ACQUIRED to
RUNNING
java.lang.NullPointerException
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.commonCheckContainerAllocation(FiCaSchedulerApp.java:324)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.accept(FiCaSchedulerApp.java:420)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.tryCommit(CapacityScheduler.java:2506)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler$ResourceCommitterService.run(CapacityScheduler.java:541)
{code}
some CapacityScheduler$AsyncScheduleThread also throws NPE
{code}
java.lang.NullPointerException
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.getResourceRequests(SchedulerApplicationAttempt.java:1341)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.canAssign(RegularContainerAllocator.java:302)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignOffSwitchContainers(RegularContainerAllocator.java:389)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainersOnNode(RegularContainerAllocator.java:470)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.tryAllocateOnNode(RegularContainerAllocator.java:252)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.allocate(RegularContainerAllocator.java:816)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainers(RegularContainerAllocator.java:854)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator.assignContainers(ContainerAllocator.java:54)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.assignContainers(FiCaSchedulerApp.java:856)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.assignContainers(LeafQueue.java:1111)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue.assignContainersToChildQueues(ParentQueue.java:735)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue.assignContainers(ParentQueue.java:559)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateOrReserveNewContainers(CapacityScheduler.java:1343)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainerOnSingleNode(CapacityScheduler.java:1337)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(CapacityScheduler.java:1434)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(CapacityScheduler.java:1199)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.schedule(CapacityScheduler.java:474)
at
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler$AsyncScheduleThread.run(CapacityScheduler.java:501)
{code}
> CapacityScheduler$ResourceCommitterService throws NPE when running sls
> ----------------------------------------------------------------------
>
> Key: YARN-7880
> URL: https://issues.apache.org/jira/browse/YARN-7880
> Project: Hadoop YARN
> Issue Type: Bug
> Components: yarn
> Affects Versions: 3.0.0
> Reporter: Jiandan Yang
> Priority: Major
>
> sls test case: node count = 9000, job count=10k,task num of job = 500, task
> run time = 100s, but it does not occur when node count = 500 and 2000.
> {code}
> 18/02/02 20:54:28 INFO rmcontainer.RMContainerImpl:
> container_1517575125794_5707_01_000086 Container Transitioned from ACQUIRED
> to RUNNING
> java.lang.NullPointerException
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.commonCheckContainerAllocation(FiCaSchedulerApp.java:324)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.accept(FiCaSchedulerApp.java:420)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.tryCommit(CapacityScheduler.java:2506)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler$ResourceCommitterService.run(CapacityScheduler.java:541)
> {code}
> some CapacityScheduler$AsyncScheduleThread also throws NPE
> {code}
> java.lang.NullPointerException
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.getResourceRequests(SchedulerApplicationAttempt.java:1341)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.canAssign(RegularContainerAllocator.java:302)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignOffSwitchContainers(RegularContainerAllocator.java:389)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainersOnNode(RegularContainerAllocator.java:470)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.tryAllocateOnNode(RegularContainerAllocator.java:252)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.allocate(RegularContainerAllocator.java:816)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainers(RegularContainerAllocator.java:854)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator.assignContainers(ContainerAllocator.java:54)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.assignContainers(FiCaSchedulerApp.java:856)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.assignContainers(LeafQueue.java:1111)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue.assignContainersToChildQueues(ParentQueue.java:735)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue.assignContainers(ParentQueue.java:559)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateOrReserveNewContainers(CapacityScheduler.java:1343)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainerOnSingleNode(CapacityScheduler.java:1337)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(CapacityScheduler.java:1434)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(CapacityScheduler.java:1199)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.schedule(CapacityScheduler.java:474)
> at
> org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler$AsyncScheduleThread.run(CapacityScheduler.java:501)
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]