> Fernando Caba wrote:
> > Hi Gus, here are the results of all commands you mention:
> >
> > [root@fe ~]# qmgr -c 'p s'
> > #
> > # Create queues and set their attributes.
> > #
> > #
> > # Create and define queue batch
> > #
> > create queue batch
> > set queue batch queue_type = Execution
> > set queue batch resources_default.nodes = 1
> > set queue batch resources_default.walltime = 2400:00:00
> > set queue batch enabled = True
> > set queue batch started = True
> > #
> > # Set server attributes.
> > #
> > set server scheduling = True
> > set server acl_hosts = fe
> > set server managers = root@fe
> > set server operators = root@fe
> > set server default_queue = batch
> > set server log_events = 511
> > set server mail_from = adm
> > set server scheduler_iteration = 600
> > set server node_check_rate = 150
> > set server tcp_timeout = 6
> > set server mom_job_sync = True
> > set server keep_completed = 300
> > set server auto_node_np = True
> > set server next_job_number = 182
> > set server record_job_info = True
> > [root@fe ~]#
> >
> >
> > ${TORQUE}/bin/pbsnodes
> >
> > [root@fe ~]# pbsnodes
> > n10
> > state = free
> > np = 12
> > ntype = cluster
> > jobs = 0/121.fe
> > status =
> >
> rectime=1317298640,varattr=,jobs=121.fe,state=free,netload=261129374581,gres=,loadave=4.00,ncpus=12,physmem=16360208kb,availmem=62484756kb,totmem=83471736kb,idletime=63369,nusers=2,nsessions=2,sessions=4394
> > 8087,uname=Linux n10 2.6.18-194.el5 #1 SMP Fri Apr 2 14:58:14 EDT 2010
> > x86_64,opsys=linux
> > mom_service_port = 15002
> > mom_manager_port = 15003
> > gpus = 0
> >
> > n11
> > state = free
> > np = 12
> > ntype = cluster
> > jobs = 0/143.fe
> > status =
> >
> rectime=1317298637,varattr=,jobs=143.fe,state=free,netload=12864227236,gres=,loadave=8.00,ncpus=12,physmem=16360208kb,availmem=78708424kb,totmem=83469060kb,idletime=1354314,nusers=2,nsessions=2,sessions=4583
> > 20253,uname=Linux n11 2.6.18-194.el5 #1 SMP Fri Apr 2 14:58:14 EDT 2010
> > x86_64,opsys=linux
> > mom_service_port = 15002
> > mom_manager_port = 15003
> > gpus = 0
> >
> > n12
> > state = free
> > np = 12
> > ntype = cluster
> > jobs = 0/144.fe
> > status =
> >
> rectime=1317298647,varattr=,jobs=144.fe,state=free,netload=953102292987,gres=,loadave=8.01,ncpus=12,physmem=16360208kb,availmem=78740696kb,totmem=83469060kb,idletime=1168354,nusers=2,nsessions=2,sessions=4635
> > 20289,uname=Linux n12 2.6.18-194.el5 #1 SMP Fri Apr 2 14:58:14 EDT 2010
> > x86_64,opsys=linux
> > mom_service_port = 15002
> > mom_manager_port = 15003
> > gpus = 0
> >
> > n13
> > state = free
> > np = 12
> > ntype = cluster
> > jobs = 0/181.fe
> > status =
> >
> rectime=1317298672,varattr=,jobs=181.fe,state=free,netload=1010169147229,gres=,loadave=4.00,ncpus=12,physmem=15955108kb,availmem=81150100kb,totmem=83066636kb,idletime=138726,nusers=2,nsessions=2,sessions=4407
> > 29186,uname=Linux n13 2.6.18-194.el5xen #1 SMP Fri Apr 2 15:34:40 EDT
> > 2010 x86_64,opsys=linux
> > mom_service_port = 15002
> > mom_manager_port = 15003
> > gpus = 0
> >
> > [root@fe ~]#
> >
> > ${MAUI}/bin/showconfig
> >
> > [root@fe ~]# which showconfig
> > /usr/local/maui/bin/showconfig
> > [root@fe ~]# showconfig
> > # Maui version 3.3.1 (PID: 18407)
> > # global policies
> >
> > REJECTNEGPRIOJOBS[0] FALSE
> > ENABLENEGJOBPRIORITY[0] FALSE
> > ENABLEMULTINODEJOBS[0] TRUE
> > ENABLEMULTIREQJOBS[0] FALSE
> > BFPRIORITYPOLICY[0] [NONE]
> > JOBPRIOACCRUALPOLICY QUEUEPOLICY
> > NODELOADPOLICY ADJUSTSTATE
> > USEMACHINESPEEDFORFS FALSE
> > USEMACHINESPEED FALSE
> > USESYSTEMQUEUETIME TRUE
> > USELOCALMACHINEPRIORITY FALSE
> > NODEUNTRACKEDLOADFACTOR 1.2
> > JOBNODEMATCHPOLICY[0] EXACTNODE
> >
> > JOBMAXSTARTTIME[0] INFINITY
> >
> > METAMAXTASKS[0] 0
> > NODESETPOLICY[0] [NONE]
> > NODESETATTRIBUTE[0] [NONE]
> > NODESETLIST[0]
> > NODESETDELAY[0] 00:00:00
> > NODESETPRIORITYTYPE[0] MINLOSS
> > NODESETTOLERANCE[0] 0.00
> >
> > BACKFILLPOLICY[0] FIRSTFIT
> > BACKFILLDEPTH[0] 0
> > BACKFILLPROCFACTOR[0] 0
> > BACKFILLMAXSCHEDULES[0] 10000
> > BACKFILLMETRIC[0] PROCS
> >
> > BFCHUNKDURATION[0] 00:00:00
> > BFCHUNKSIZE[0] 0
> > PREEMPTPOLICY[0] REQUEUE
> > MINADMINSTIME[0] 00:00:00
> > RESOURCELIMITPOLICY[0]
> > NODEAVAILABILITYPOLICY[0] COMBINED:[DEFAULT]
> > NODEALLOCATIONPOLICY[0] MINRESOURCE
> > TASKDISTRIBUTIONPOLICY[0] DEFAULT
> > RESERVATIONPOLICY[0] CURRENTHIGHEST
> > RESERVATIONRETRYTIME[0] 00:00:00
> > RESERVATIONTHRESHOLDTYPE[0] NONE
> > RESERVATIONTHRESHOLDVALUE[0] 0
> >
> > FSPOLICY [NONE]
> > FSPOLICY [NONE]
> > FSINTERVAL 12:00:00
> > FSDEPTH 8
> > FSDECAY 1.00
> >
> >
> >
> > # Priority Weights
> >
> > SERVICEWEIGHT[0] 1
> > TARGETWEIGHT[0] 1
> > CREDWEIGHT[0] 1
> > ATTRWEIGHT[0] 1
> > FSWEIGHT[0] 1
> > RESWEIGHT[0] 1
> > USAGEWEIGHT[0] 1
> > QUEUETIMEWEIGHT[0] 1
> > XFACTORWEIGHT[0] 0
> > SPVIOLATIONWEIGHT[0] 0
> > BYPASSWEIGHT[0] 0
> > TARGETQUEUETIMEWEIGHT[0] 0
> > TARGETXFACTORWEIGHT[0] 0
> > USERWEIGHT[0] 0
> > GROUPWEIGHT[0] 0
> > ACCOUNTWEIGHT[0] 0
> > QOSWEIGHT[0] 0
> > CLASSWEIGHT[0] 0
> > FSUSERWEIGHT[0] 0
> > FSGROUPWEIGHT[0] 0
> > FSACCOUNTWEIGHT[0] 0
> > FSQOSWEIGHT[0] 0
> > FSCLASSWEIGHT[0] 0
> > ATTRATTRWEIGHT[0] 0
> > ATTRSTATEWEIGHT[0] 0
> > NODEWEIGHT[0] 0
> > PROCWEIGHT[0] 0
> > MEMWEIGHT[0] 0
> > SWAPWEIGHT[0] 0
> > DISKWEIGHT[0] 0
> > PSWEIGHT[0] 0
> > PEWEIGHT[0] 0
> > WALLTIMEWEIGHT[0] 0
> > UPROCWEIGHT[0] 0
> > UJOBWEIGHT[0] 0
> > CONSUMEDWEIGHT[0] 0
> > USAGEEXECUTIONTIMEWEIGHT[0] 0
> > REMAININGWEIGHT[0] 0
> > PERCENTWEIGHT[0] 0
> > XFMINWCLIMIT[0] 00:02:00
> >
> >
> > # partition DEFAULT policies
> >
> > REJECTNEGPRIOJOBS[1] FALSE
> > ENABLENEGJOBPRIORITY[1] FALSE
> > ENABLEMULTINODEJOBS[1] TRUE
> > ENABLEMULTIREQJOBS[1] FALSE
> > BFPRIORITYPOLICY[1] [NONE]
> > JOBPRIOACCRUALPOLICY QUEUEPOLICY
> > NODELOADPOLICY ADJUSTSTATE
> > JOBNODEMATCHPOLICY[1]
> >
> > JOBMAXSTARTTIME[1] INFINITY
> >
> > METAMAXTASKS[1] 0
> > NODESETPOLICY[1] [NONE]
> > NODESETATTRIBUTE[1] [NONE]
> > NODESETLIST[1]
> > NODESETDELAY[1] 00:00:00
> > NODESETPRIORITYTYPE[1] MINLOSS
> > NODESETTOLERANCE[1] 0.00
> >
> > # Priority Weights
> >
> > XFMINWCLIMIT[1] 00:00:00
> >
> > RMAUTHTYPE[0] CHECKSUM
> >
> > CLASSCFG[[NONE]] DEFAULT.FEATURES=[NONE]
> > CLASSCFG[[ALL]] DEFAULT.FEATURES=[NONE]
> > CLASSCFG[batch] DEFAULT.FEATURES=[NONE]
> > QOSPRIORITY[0] 0
> > QOSQTWEIGHT[0] 0
> > QOSXFWEIGHT[0] 0
> > QOSTARGETXF[0] 0.00
> > QOSTARGETQT[0] 00:00:00
> > QOSFLAGS[0]
> > QOSPRIORITY[1] 0
> > QOSQTWEIGHT[1] 0
> > QOSXFWEIGHT[1] 0
> > QOSTARGETXF[1] 0.00
> > QOSTARGETQT[1] 00:00:00
> > QOSFLAGS[1]
> > # SERVER MODULES: MX
> > SERVERMODE NORMAL
> > SERVERNAME
> > SERVERHOST fe
> > SERVERPORT 42559
> > LOGFILE maui.log
> > LOGFILEMAXSIZE 10000000
> > LOGFILEROLLDEPTH 1
> > LOGLEVEL 3
> > LOGFACILITY fALL
> > SERVERHOMEDIR /usr/local/maui/
> > TOOLSDIR /usr/local/maui/tools/
> > LOGDIR /usr/local/maui/log/
> > STATDIR /usr/local/maui/stats/
> > LOCKFILE /usr/local/maui/maui.pid
> > SERVERCONFIGFILE /usr/local/maui/maui.cfg
> > CHECKPOINTFILE /usr/local/maui/maui.ck
> > CHECKPOINTINTERVAL 00:05:00
> > CHECKPOINTEXPIRATIONTIME 3:11:20:00
> > TRAPJOB
> > TRAPNODE
> > TRAPFUNCTION
> > RESDEPTH 24
> >
> > RMPOLLINTERVAL 00:00:30
> > NODEACCESSPOLICY SHARED
> > ALLOCLOCALITYPOLICY [NONE]
> > SIMTIMEPOLICY [NONE]
> > ADMIN1 root
> > ADMINHOSTS ALL
> > NODEPOLLFREQUENCY 0
> > DISPLAYFLAGS
> > DEFAULTDOMAIN
> > DEFAULTCLASSLIST [DEFAULT:1]
> > FEATURENODETYPEHEADER
> > FEATUREPROCSPEEDHEADER
> > FEATUREPARTITIONHEADER
> > DEFERTIME 1:00:00
> > DEFERCOUNT 24
> > DEFERSTARTCOUNT 1
> > JOBPURGETIME 0
> > NODEPURGETIME 2140000000
> > APIFAILURETHRESHHOLD 6
> > NODESYNCTIME 600
> > JOBSYNCTIME 600
> > JOBMAXOVERRUN 00:10:00
> > NODEMAXLOAD 0.0
> >
> > PLOTMINTIME 120
> > PLOTMAXTIME 245760
> > PLOTTIMESCALE 11
> > PLOTMINPROC 1
> > PLOTMAXPROC 512
> > PLOTPROCSCALE 9
> > SCHEDCFG[] MODE=NORMAL SERVER=fe:42559
> > # RM MODULES: PBS SSS WIKI NATIVE
> > RMCFG[FE] AUTHTYPE=CHECKSUM EPORT=15004 TIMEOUT=00:00:09 TYPE=PBS
> > SIMWORKLOADTRACEFILE workload
> > SIMRESOURCETRACEFILE resource
> > SIMAUTOSHUTDOWN OFF
> > SIMSTARTTIME 0
> > SIMSCALEJOBRUNTIME FALSE
> > SIMFLAGS
> > SIMJOBSUBMISSIONPOLICY CONSTANTJOBDEPTH
> > SIMINITIALQUEUEDEPTH 16
> > SIMWCACCURACY 0.00
> > SIMWCACCURACYCHANGE 0.00
> > SIMNODECOUNT 0
> > SIMNODECONFIGURATION NORMAL
> > SIMWCSCALINGPERCENT 100
> > SIMCOMRATE 0.10
> > SIMCOMTYPE ROUNDROBIN
> > COMINTRAFRAMECOST 0.30
> > COMINTERFRAMECOST 0.30
> > SIMSTOPITERATION -1
> > SIMEXITITERATION -1
> >
> >
> >
> > [root@fe ~]# ps -ef |grep maui
> > root 18407 1 0 Sep28 ? 00:00:04 /usr/local/maui/sbin/maui
> > root 22527 22463 0 09:19 pts/2 00:00:00 grep maui
> > [root@fe ~]# service maui status
> > maui (pid 18407) is running...
> > [root@fe ~]# service pbs_server status
> > pbs_server (pid 4147) is running...
> > [root@fe ~]#
> >
> > service pbs_sched status [just in case it is also running ...]
> > service pbs_mom status
> > service pbs status
> >
> > none of those 3 services are installed
> >
> > Thank you very much
> >
>
When you shoot a -lnodes=1:ppn=12 against a node, what's the output of
pbsnodes for that node?
Does it show that 12 cores are in use?
When you issue a qstat -f, does it show that your job is really using 12
cores?
> > ----------------------------------------------------
> > Ing. Fernando Caba
> > Director General de Telecomunicaciones
> > Universidad Nacional del Sur
> > http://www.dgt.uns.edu.ar
> > Tel/Fax: (54)-291-4595166
> > Tel: (54)-291-4595101 int. 2050
> > Avda. Alem 1253, (B8000CPB) Bahía Blanca - Argentina
> > ----------------------------------------------------
> >
>
cheers,
--
Denis Anjos,
www.versatushpc.com.br
_______________________________________________
mauiusers mailing list
[email protected]
http://www.supercluster.org/mailman/listinfo/mauiusers