Hi! We have a user that submitted a job that did not start as expected.
He was using --exclusive together with --ntasks-per-node but ended up with all task on one node anyway.
----8<---- #SBATCH -n 16 #SBATCH --exclusive #SBATCH --ntasks-per-node=8 ----8<---- See the attached files for more information about how the job was submitted. We are currently running version 2.6.3. Best regards, Magnus -- Magnus Jonsson, Developer, HPC2N, UmeƄ Universitet
JobId=1603907 Name=submit_e
UserId=magnus(2066) GroupId=folk(3001)
Priority=658834 Account=sysop QOS=normal
JobState=RUNNING Reason=None Dependency=(null)
Requeue=1 Restarts=0 BatchFlag=1 ExitCode=0:0
DerivedExitCode=0:0
RunTime=00:00:44 TimeLimit=00:30:00 TimeMin=N/A
SubmitTime=2014-02-19T09:03:10 EligibleTime=2014-02-19T09:03:10
StartTime=2014-02-19T09:05:45 EndTime=2014-02-19T09:35:45
PreemptTime=None SuspendTime=None SecsPreSuspend=0
Partition=devel AllocNode:Sid=t-mn01:14395
ReqNodeList=(null) ExcNodeList=(null)
NodeList=t-cn0304
BatchHost=t-cn0304
NumNodes=6 NumCPUs=48 CPUs/Task=1 ReqS:C:T=*:*:*
Nodes=t-cn0304 CPU_IDs=0-47 Mem=127200
MinCPUsNode=8 MinMemoryCPU=2650M MinTmpDiskNode=0
Features=(null) Gres=(null) Reservation=(null)
Shared=0 Contiguous=0 Licenses=(null) Network=(null)
Command=/pfs/nobackup/home/m/magnus/y/submit_e
WorkDir=/pfs/nobackup/home/m/magnus/y
BatchScript=
#!/bin/bash
#SBATCH -A sysop
#SBATCH -p devel
#SBATCH -o e.out
#SBATCH -n 16
#SBATCH --exclusive
#SBATCH --ntasks-per-node=8
scontrol show job -d -d $SLURM_JOBID
srun hostname
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
t-cn0304.hpc2n.umu.se
JobId=1603906 Name=submit
UserId=magnus(2066) GroupId=folk(3001)
Priority=658834 Account=sysop QOS=normal
JobState=RUNNING Reason=None Dependency=(null)
Requeue=1 Restarts=0 BatchFlag=1 ExitCode=0:0
DerivedExitCode=0:0
RunTime=00:00:01 TimeLimit=00:30:00 TimeMin=N/A
SubmitTime=2014-02-19T09:03:09 EligibleTime=2014-02-19T09:03:09
StartTime=2014-02-19T09:03:44 EndTime=2014-02-19T09:33:44
PreemptTime=None SuspendTime=None SecsPreSuspend=0
Partition=devel AllocNode:Sid=t-mn01:14395
ReqNodeList=(null) ExcNodeList=(null)
NodeList=t-cn[1015,1017]
BatchHost=t-cn1015
NumNodes=2 NumCPUs=24 CPUs/Task=1 ReqS:C:T=*:*:*
Nodes=t-cn[1015,1017] CPU_IDs=0-11 Mem=31800
MinCPUsNode=8 MinMemoryCPU=2650M MinTmpDiskNode=0
Features=(null) Gres=(null) Reservation=(null)
Shared=OK Contiguous=0 Licenses=(null) Network=(null)
Command=/pfs/nobackup/home/m/magnus/y/submit
WorkDir=/pfs/nobackup/home/m/magnus/y
BatchScript=
#!/bin/bash
#SBATCH -A sysop
#SBATCH -p devel
#SBATCH -o o.out
#SBATCH -n 16
#SBATCH --ntasks-per-node=8
scontrol show job -d -d $SLURM_JOBID
srun hostname
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1015.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
t-cn1017.hpc2n.umu.se
smime.p7s
Description: S/MIME Cryptographic Signature
