Hi Fernando

This is unlikely, but is there any chance that,
due to fragmentation over time,
the 4 idle processors exist in separate nodes?
You can check that with qtop (or pbsnodes).

Do jobs 5610 and 5613 request
processors in a single node?
Say:
#PBS -l nodes=1:ppn=4 .
If so, I wonder if Maui+Torque would start the job at all,
in case the 4 idle processors straddle more than one node.

In this case, maybe the user could try to launch the jobs with
#PBS -l nodes=4:ppn=1+ppn=1+ppn=1+ppn=1,
instead?

Just a guess,
Gus Correa

On 10/08/2013 10:29 AM, Fernando Caba wrote:
> Hi All, i have a cluster with maui 3.3.1 and torque 3.0.1. Cluster works
> fine.
> But, the next situation recently is very often. Having free processors
> (4 in this case), whyjobs 5610 and 5613 (4 processors each), don´t
> ingress in the the queue.
> It´s obviously that i have freee resources (4 processors), the users
> claims for execution deferred (or idle)
>
> [root@cluster]# showq
> ACTIVE JOBS--------------------
> JOBNAME USERNAME STATE PROC REMAINING STARTTIME
>
> 5410 ricardo Running 4 INFINITY Mon Sep 23 11:35:22
> 5463 sol Running 4 INFINITY Fri Sep 27 19:11:06
> 5464 sol Running 4 INFINITY Fri Sep 27 19:11:24
> 5535 carolina Running 4 INFINITY Tue Oct 1 11:07:33
> 5536 nicolas Running 4 INFINITY Tue Oct 1 12:46:10
> 5559 mmarta Running 4 INFINITY Fri Oct 4 10:06:34
> 5560 mmarta Running 4 INFINITY Fri Oct 4 10:09:23
> 5568 leandro Running 4 INFINITY Fri Oct 4 15:57:36
> 5579 silvia Running 12 INFINITY Sat Oct 5 22:40:39
> 5586 carolina Running 4 INFINITY Sun Oct 6 18:36:15
> 5588 walter Running 4 INFINITY Sun Oct 6 23:35:03
> 5592 patricia Running 4 INFINITY Mon Oct 7 09:03:27
> 5594 mmarta Running 4 INFINITY Mon Oct 7 11:00:57
> 5600 ricardo Running 4 INFINITY Mon Oct 7 17:50:09
> 5601 carolina Running 4 INFINITY Mon Oct 7 18:28:48
> 5602 nicolas Running 4 INFINITY Mon Oct 7 20:43:33
> 5605 nicolas Running 4 INFINITY Tue Oct 8 00:14:51
> 5606 gabriela Running 4 INFINITY Tue Oct 8 06:17:47
> 5607 gabriela Running 4 INFINITY Tue Oct 8 06:21:50
> 5608 gabriela Running 4 INFINITY Tue Oct 8 06:25:26
> 5609 cecilia Running 4 INFINITY Tue Oct 8 06:29:13
>
> 21 Active Jobs 92 of 96 Processors Active (95.83%)
> 8 of 8 Nodes Active (100.00%)
>
> IDLE JOBS----------------------
> JOBNAME USERNAME STATE PROC WCLIMIT QUEUETIME
>
>
> 0 Idle Jobs
>
> BLOCKED JOBS----------------
> JOBNAME USERNAME STATE PROC WCLIMIT QUEUETIME
>
> 5584 patricia Deferred 12 INFINITY Sun Oct 6 11:02:55
> 5610 cecilia Deferred 4 INFINITY Tue Oct 8 06:30:26
> 5613 patricia Deferred 4 INFINITY Tue Oct 8 11:06:19
>
> Total Jobs: 24 Active Jobs: 21 Idle Jobs: 0 Blocked Jobs: 3
> [root@cluster]#
>
> this is my maui.cfg file:
>
> # maui.cfg 3.3.1
>
> SERVERHOST fe
>
> #SERVERHOST castellani13.fisica.uns.edu.ar
>
> # primary admin must be first in list
> ADMIN1 root
>
> # Resource Manager Definition
>
> RMCFG[FE] TYPE=PBS
>
> # Allocation Manager Definition
>
> AMCFG[bank] TYPE=NONE
>
> # full parameter docs at http://supercluster.org/mauidocs/a.fparameters.html
> # use the 'schedctl -l' command to display current configuration
>
> RMPOLLINTERVAL 00:00:30
>
> SERVERPORT 42559
> SERVERMODE NORMAL
>
> # Admin: http://supercluster.org/mauidocs/a.esecurity.html
>
>
> LOGFILE maui.log
> LOGFILEMAXSIZE 10000000
> LOGLEVEL 3
>
> # Job Priority: http://supercluster.org/mauidocs/5.1jobprioritization.html
>
> QUEUETIMEWEIGHT 1
>
> # FairShare: http://supercluster.org/mauidocs/6.3fairshare.html
>
> #FSPOLICY PSDEDICATED
> #FSDEPTH 7
> #FSINTERVAL 86400
> #FSDECAY 0.80
>
> # Throttling Policies:
> http://supercluster.org/mauidocs/6.2throttlingpolicies.html
>
> # NONE SPECIFIED
>
> # Backfill: http://supercluster.org/mauidocs/8.2backfill.html
>
> BACKFILLPOLICY FIRSTFIT
> RESERVATIONPOLICY CURRENTHIGHEST
>
> # Node Allocation: http://supercluster.org/mauidocs/5.2nodeallocation.html
>
> NODEALLOCATIONPOLICY MINRESOURCE
>
> # QOS: http://supercluster.org/mauidocs/7.3qos.html
>
> # QOSCFG[hi] PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB
> # QOSCFG[low] PRIORITY=-1000 FLAGS=PREEMPTEE
>
> # Standing Reservations:
> http://supercluster.org/mauidocs/7.1.3standingreservations.html
>
> # SRSTARTTIME[test] 8:00:00
> # SRENDTIME[test] 17:00:00
> # SRDAYS[test] MON TUE WED THU FRI
> # SRTASKCOUNT[test] 20
> # SRMAXTIME[test] 0:30:00
>
> # Creds: http://supercluster.org/mauidocs/6.1fairnessoverview.html
>
> # USERCFG[DEFAULT] FSTARGET=25.0
> # USERCFG[john] PRIORITY=100 FSTARGET=10.0-
> # GROUPCFG[staff] PRIORITY=1000 QLIST=hi:low QDEF=hi
> # CLASSCFG[batch] FLAGS=PREEMPTEE
> # CLASSCFG[interactive] FLAGS=PREEMPTOR
>
> JOBNODEMATCHPOLICY EXACTPROC
> #JOBNODEMATCHPOLICY EXACTNODE
> # estaba esta linea cuando quedaban procs ociosos
> [root@fe maui]# vi maui.cfg
> # maui.cfg 3.3.1
>
> SERVERHOST fe
>
> #SERVERHOST castellani13.fisica.uns.edu.ar
>
> # primary admin must be first in list
> ADMIN1 root
>
> # Resource Manager Definition
>
> RMCFG[FE] TYPE=PBS
>
> # Allocation Manager Definition
>
> AMCFG[bank] TYPE=NONE
>
> # full parameter docs at http://supercluster.org/mauidocs/a.fparameters.html
> # use the 'schedctl -l' command to display current configuration
>
> RMPOLLINTERVAL 00:00:30
>
> SERVERPORT 42559
> "maui.cfg" 80L, 2096C
>
> # QOSCFG[hi] PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB
> # QOSCFG[low] PRIORITY=-1000 FLAGS=PREEMPTEE
>
> # Standing Reservations:
> http://supercluster.org/mauidocs/7.1.3standingreservations.html
>
> # SRSTARTTIME[test] 8:00:00
> # SRENDTIME[test] 17:00:00
> # SRDAYS[test] MON TUE WED THU FRI
> # SRTASKCOUNT[test] 20
> # SRMAXTIME[test] 0:30:00
>
> # Creds: http://supercluster.org/mauidocs/6.1fairnessoverview.html
>
> # USERCFG[DEFAULT] FSTARGET=25.0
> # USERCFG[john] PRIORITY=100 FSTARGET=10.0-
> # GROUPCFG[staff] PRIORITY=1000 QLIST=hi:low QDEF=hi
> # CLASSCFG[batch] FLAGS=PREEMPTEE
> # CLASSCFG[interactive] FLAGS=PREEMPTOR
>
> JOBNODEMATCHPOLICY EXACTPROC
> #JOBNODEMATCHPOLICY EXACTNODE
> ~
>
>
> ------------------------------------------------
>
> And this is my queue configuration:
>
> #
> # Create queues and set their attributes.
> #
> #
> # Create and define queue batch
> #
> create queue batch
> set queue batch queue_type = Execution
> set queue batch resources_default.nodes = 4
> set queue batch resources_default.walltime = 4800:00:00
> set queue batch enabled = True
> set queue batch started = True
> #
> # Set server attributes.
> #
> set server scheduling = True
> set server acl_hosts = fe
> set server managers = root@fe
> set server operators = root@fe
> set server default_queue = batch
> set server log_events = 511
> set server mail_from = grumasica
> set server scheduler_iteration = 600
> set server node_check_rate = 150
> set server tcp_timeout = 6
> set server log_level = 7
> set server mom_job_sync = True
> set server mail_domain = uns.edu.ar
> set server keep_completed = 300
> set server auto_node_np = True
> set server next_job_number = 5614
> set server record_job_info = True
> set server record_job_script = True
>
> ----------------------------------
>
> Thanks!!!
>
>
> --
>
> *
> <http://www.uns.edu.ar> Universidad
> Nacional del Sur
> *
>
>       Ing. Fernando Caba
> Director General de Telecomunicaciones
> Avda. Alem 1253, (B8000CPB) Bahía Blanca - Argentina
> Tel/Fax: (54)-291-4595166
> Tel: (54)-291-4595101 int. 2050
> http://www.dgt.uns.edu.ar
>
>
>
> _______________________________________________
> mauiusers mailing list
> [email protected]
> http://www.supercluster.org/mailman/listinfo/mauiusers

_______________________________________________
mauiusers mailing list
[email protected]
http://www.supercluster.org/mailman/listinfo/mauiusers

Reply via email to