Hello all-

(I sent this email to the torque list, but I'm wondering if it might be a maui 
problem).

We are trying to use procs= and pmem= on an 18 node (152core) cluster with 
nodes of various memory size. pbsnodes shows the correct memory complement for 
each node, so apparently PBS is getting the right specs (see the output of 
pbsnodes below for more information). If we use the following settings in the 
PBS script, invariably torque/maui will try to fill up the all 8 of the 8 cores 
of each node. That is even though there is nowhere near enough memory on any of 
these nodes for 8*3700mb=29600mb. Considering the physical memory limit goes 
from 8GB to 24GB depending upon the node, this is just taking down nodes left 
and right.

Below I have provided a small example along with the associated output. I also 
provided the output for pbsnodes in case there is something I am missing here.

Thanks for your help!  -Lance

torque version: tried 2.5.4, 2.5.8, and 3.0.2 - all exhibit the same problem.
maui version: 3.2.6p21 (also tried maui 3.3.1 but it is a complete fail in 
terms of the procs option and it only asks for a single CPU)

$ cat tmp.pbs
#!/bin/bash
#PBS -S /bin/bash
#PBS -l procs=24
#PBS -l pmem=3700mb
#PBS -l walltime=6:00:00 
#PBS -j oe

cat $PBS_NODEFILE

$ qsub tmp.pbs
337003.XXXX
$ wc -l tmp.pbs.o337003
24 tmp.pbs.o337003
$ cat tmp.pbs.o337003
compute-0-14
compute-0-14
compute-0-14
compute-0-14
compute-0-14
compute-0-14
compute-0-14
compute-0-14
compute-0-15
compute-0-15
compute-0-15
compute-0-15
compute-0-15
compute-0-15
compute-0-15
compute-0-15
compute-0-16
compute-0-16
compute-0-16
compute-0-16
compute-0-16
compute-0-16
compute-0-16
compute-0-16

$ pbsnodes -a
compute-0-16
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219085,varattr=,jobs=,state=free,netload=1834011936,gres=,loadave=0.00,ncpus=8,physmem=8177300kb,availmem=10095652kb,totmem=10225576kb,idletime=5582,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-16.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-15
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=700017694,gres=,loadave=0.00,ncpus=8,physmem=8177300kb,availmem=10150996kb,totmem=10225576kb,idletime=5606,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-15.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-14
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=1003164957,gres=,loadave=0.00,ncpus=8,physmem=8177300kb,availmem=10131180kb,totmem=10225576kb,idletime=5615,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-14.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-13
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=1173266470,gres=,loadave=0.00,ncpus=8,physmem=8177300kb,availmem=10132104kb,totmem=10225576kb,idletime=5637,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-13.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-12
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=3991477,gres=,loadave=0.00,ncpus=8,physmem=12301956kb,availmem=14276448kb,totmem=14350232kb,idletime=5604,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-12.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-11
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2947879,gres=,loadave=0.00,ncpus=8,physmem=12301956kb,availmem=14274604kb,totmem=14350232kb,idletime=5588,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-11.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-9
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=3721396,gres=,loadave=0.05,ncpus=8,physmem=12301956kb,availmem=14253816kb,totmem=14350232kb,idletime=5660,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-9.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-8
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2934478,gres=,loadave=0.00,ncpus=8,physmem=12301956kb,availmem=14254796kb,totmem=14350232kb,idletime=5675,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-8.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-7
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2909406,gres=,loadave=0.00,ncpus=8,physmem=12301956kb,availmem=14254812kb,totmem=14350232kb,idletime=5489,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-7.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-6
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2936791,gres=,loadave=0.00,ncpus=8,physmem=12301956kb,availmem=14275644kb,totmem=14350232kb,idletime=5748,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-6.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-5
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2966183,gres=,loadave=0.00,ncpus=8,physmem=12301956kb,availmem=14276260kb,totmem=14350232kb,idletime=5695,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-5.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-4
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2886627,gres=,loadave=0.00,ncpus=8,physmem=16438900kb,availmem=18412332kb,totmem=18487176kb,idletime=5634,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-4.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-3
   state = free
   np = 8
   properties = lustre
   ntype = cluster
   status = 
rectime=1319219108,varattr=,jobs=,state=free,netload=436527254,gres=,loadave=0.00,ncpus=8,physmem=24688212kb,availmem=26636656kb,totmem=26736488kb,idletime=2224,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-3.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-2
   state = free
   np = 8
   properties = lustre
   ntype = cluster
   status = 
rectime=1319219106,varattr=,jobs=,state=free,netload=1184385,gres=,loadave=0.00,ncpus=8,physmem=24688212kb,availmem=26659668kb,totmem=26736488kb,idletime=2223,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-2.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-1
   state = free
   np = 8
   properties = lustre
   ntype = cluster
   status = 
rectime=1319219102,varattr=,jobs=,state=free,netload=1258074,gres=,loadave=0.00,ncpus=8,physmem=24688212kb,availmem=26657304kb,totmem=26736488kb,idletime=2228,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-1.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-0
   state = free
   np = 8
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=3416356,gres=,loadave=0.00,ncpus=8,physmem=24688212kb,availmem=26635624kb,totmem=26736488kb,idletime=5603,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-0.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 
20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0

compute-0-10
   state = free
   np = 2
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=283846193,gres=,loadave=0.23,ncpus=8,physmem=12301956kb,availmem=13762696kb,totmem=14350232kb,idletime=5622,nusers=1,nsessions=1,sessions=3410,uname=Linux
 compute-0-10.local 2.6.18-274.7.1.el5 #1 SMP Thu Oct 20 16:21:01 EDT 2011 
x86_64,opsys=linux
   gpus = 0

compute-0-17
   state = free
   np = 8
   properties = testbox
   ntype = cluster
   status = 
rectime=1319219090,varattr=,jobs=,state=free,netload=2948331,gres=,loadave=0.00,ncpus=8,physmem=8177300kb,availmem=10144432kb,totmem=10225576kb,idletime=5558,nusers=0,nsessions=?
 0,sessions=? 0,uname=Linux compute-0-17.local 2.6.18-274.7.1.el5 #1 SMP Thu 
Oct 20 16:21:01 EDT 2011 x86_64,opsys=linux
   gpus = 0


_______________________________________________
mauiusers mailing list
[email protected]
http://www.supercluster.org/mailman/listinfo/mauiusers

Reply via email to