Hi Guys,

I have setup a new slurm 15.08 cluster and can't seem to start more
than 1 job on a compute node.  I do have the
SelectType=select/cons_res option set in my slurm.conf but here is
what I am seeing.

I am able to start my first interactive job by running:

srun -u bash -i -w client1

the second one just gets queued:

$ srun -w client1 -u bash -i
srun: job 114 queued and waiting for resources

# squeue -l
Fri Mar 18 15:26:11 2016
             JOBID PARTITION     NAME     USER    STATE       TIME
TIME_LIMI  NODES NODELIST(REASON)
               114  slurmdev     bash   jagga  PENDING       0:00
UNLIMITED      1 (Resources)
               113  slurmdev     bash   jagga  RUNNING       9:26
UNLIMITED      1 client1


Here is my slurm.conf file:
--
ClusterName=slurmdev
ControlMachine=slurmdev01
ControlAddr=x.x.x.x
SlurmUser=slurm
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
StateSaveLocation=/var/spool/slurm
SlurmdSpoolDir=/var/spool/slurm/slurmd
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
ProctrackType=proctrack/pgid
CacheGroups=0
ReturnToService=2
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
SchedulerType=sched/backfill
SelectType=select/cons_res
SelectTypeParameters=CR_CPU_Memory
SchedulerParameters=kill_invalid_depend
FastSchedule=1
SlurmctldDebug=3
SlurmdDebug=3
JobCompType=jobcomp/none
AccountingStorageEnforce=associations
AccountingStorageHost=slurmdev01.gene.com
AccountingStorageLoc=/var/log/slurm_acct.log
AccountingStorageType=accounting_storage/slurmdbd
AccountingStoreJobComment=YES
NodeName=client[1-10] CPUs=40 RealMemory=257680 Sockets=2
CoresPerSocket=10 ThreadsPerCore=2 State=UNKNOWN
PartitionName=slurmdev Nodes=client[1-10] Default=YES MaxTime=INFINITE State=UP
--

# scontrol show job 114
JobId=114 JobName=bash
   UserId=jagga(7229) GroupId=jagga(1984)
   Priority=4294901710 Nice=0 Account=research QOS=normal
   JobState=PENDING Reason=Resources Dependency=(null)
   Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0
   RunTime=00:00:00 TimeLimit=UNLIMITED TimeMin=N/A
   SubmitTime=2016-03-18T15:17:11 EligibleTime=2016-03-18T15:17:11
   StartTime=2017-03-18T15:16:45 EndTime=Unknown
   PreemptTime=None SuspendTime=None SecsPreSuspend=0
   Partition=slurmdev AllocNode:Sid=slurmdev01:13348
   ReqNodeList=client1 ExcNodeList=(null)
   NodeList=(null)
   NumNodes=1 NumCPUs=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
   TRES=cpu=1,node=1
   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
   MinCPUsNode=1 MinMemoryNode=0 MinTmpDiskNode=0
   Features=(null) Gres=(null) Reservation=(null)
   Shared=OK Contiguous=0 Licenses=(null) Network=(null)
   Command=bash
   WorkDir=/home/jagga
   Power= SICP=0

Any help would be appreciated!

Thanks.

Reply via email to