Micheal

We have actually moved to a larger cluster of 64 nodes (50 quad core and 
14 dual opterons), there fore 220 processors available.  We are 
submitting a job that requires 64 threads but still with the same 
result.  Here are the files you requested.  I have already submitted to 
torque user list.

####### PBS SCRIPT START#######

#!/bin/sh -f
#PBS -l nodes=64
#PBS -N scaling_test
#PBS -e scaling_test.err
#PBS -o scaling_test.log
#PBS -j oe
#PBS -l mem=64000mb
#PBS -m abe
#PBS -q parallel

NCPU=`wc -l < $PBS_NODEFILE`
echo ------------------------------------------------------
echo ' This job is allocated on '${NCPU}' cpu(s)'
echo 'Job is running on node(s): '
cat $PBS_NODEFILE
echo PBS: qsub is running on $PBS_O_HOST
echo PBS: originating queue is $PBS_O_QUEUE
echo PBS: executing queue is $PBS_QUEUE
echo PBS: working directory is $PBS_O_WORKDIR
echo PBS: execution mode is $PBS_ENVIRONMENT
echo PBS: job identifier is $PBS_JOBID
echo PBS: job name is $PBS_JOBNAME
echo PBS: node file is $PBS_NODEFILE
echo PBS: current home directory is $PBS_O_HOME
echo PBS: PATH = $PBS_O_PATH
echo ------------------------------------------------------
SERVER=$PBS_O_HOST
WORKDIR=$HOME/pbs/multi/scaling_test
cd ${WORKDIR}
cat $PBS_NODEFILE > nodes.list
lamboot -s -H $PBS_NODEFILE
mpirun -np $NCPU /opt/fds/fds5_mpi scaling_test.fds
lamhalt

####### PBS SCRIPT END #######

####### MAUI.CFG START #######
# maui.cfg 3.2.6p14

SERVERHOST              master.atar.senecac.on.ca
# primary admin must be first in list
ADMIN1                  root
ADMIN3                  nilesh.mistry  


# Resource Manager Definition

RMCFG[master.atar.senecac.on.ca] TYPE=PBS

# Allocation Manager Definition

AMCFG[bank]  TYPE=NONE

# full parameter docs at 
http://clusterresources.com/mauidocs/a.fparameters.html
# use the 'schedctl -l' command to display current configuration

RMPOLLINTERVAL  00:01:00

SERVERPORT            42559
SERVERMODE            NORMAL

# Admin: http://clusterresources.com/mauidocs/a.esecurity.html


LOGFILE               maui.log
LOGFILEMAXSIZE        10000000
LOGLEVEL              4
LOGFACILITY             fALL

# Job Priority: 
http://clusterresources.com/mauidocs/5.1jobprioritization.html

QUEUETIMEWEIGHT       1

# FairShare: http://clusterresources.com/mauidocs/6.3fairshare.html

#FSPOLICY              PSDEDICATED
#FSDEPTH               7
#FSINTERVAL            86400
#FSDECAY               0.80

# Throttling Policies: 
http://clusterresources.com/mauidocs/6.2throttlingpolicies.html

# NONE SPECIFIED

# Backfill: http://clusterresources.com/mauidocs/8.2backfill.html

BACKFILLPOLICY  ON
RESERVATIONPOLICY     CURRENTHIGHEST

# the following are modified/added by Mehrdad 13 Sept 07
#NODEACCESSPOLICY       DEDICATED
NODEACCESSPOLICY        SHARED
JOBNODEMATCHPOLICY   EXACTPROC

# Node Allocation: 
http://clusterresources.com/mauidocs/5.2nodeallocation.html

NODEALLOCATIONPOLICY  MINRESOURCE

# QOS: http://clusterresources.com/mauidocs/7.3qos.html

# QOSCFG[hi]  PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB
# QOSCFG[low] PRIORITY=-1000 FLAGS=PREEMPTEE

# Standing Reservations: 
http://clusterresources.com/mauidocs/7.1.3standingreservations.html

# SRSTARTTIME[test] 8:00:00
# SRENDTIME[test]   17:00:00
# SRDAYS[test]      MON TUE WED THU FRI
# SRTASKCOUNT[test] 20
# SRMAXTIME[test]   0:30:00

# Creds: http://clusterresources.com/mauidocs/6.1fairnessoverview.html

# USERCFG[DEFAULT]      FSTARGET=25.0
# USERCFG[john]         PRIORITY=100  FSTARGET=10.0-
# GROUPCFG[staff]       PRIORITY=1000 QLIST=hi:low QDEF=hi
# CLASSCFG[batch]       FLAGS=PREEMPTEE
# CLASSCFG[interactive] FLAGS=PREEMPTOR
USERCFG[DEFAULT]        MAXJOB=4
####### MAUI.CFG  END #######

####### QMGR -c "PRINT SERVER MASTER" ########
#
# Create queues and set their attributes.
#
#
# Create and define queue serial
#
create queue serial
set queue serial queue_type = Execution
set queue serial resources_max.cput = 1000:00:00
set queue serial resources_max.mem = 3000mb
set queue serial resources_max.ncpus = 1
set queue serial resources_max.nodect = 1
set queue serial resources_max.nodes = 1:ppn=1
set queue serial resources_max.walltime = 1000:00:00
set queue serial resources_default.cput = 336:00:00
set queue serial resources_default.mem = 900mb
set queue serial resources_default.ncpus = 1
set queue serial resources_default.nodect = 1
set queue serial resources_default.nodes = 1:ppn=1
set queue serial enabled = True
set queue serial started = True
#
# Create and define queue workq
#
create queue workq
set queue workq queue_type = Execution
set queue workq resources_max.cput = 10000:00:00
set queue workq resources_max.ncpus = 200
set queue workq resources_max.nodect = 64
set queue workq resources_max.nodes = 200:ppn=4
set queue workq resources_max.walltime = 10000:00:00
set queue workq resources_min.cput = 00:00:01
set queue workq resources_min.ncpus = 1
set queue workq resources_min.nodect = 1
set queue workq resources_min.walltime = 00:00:01
set queue workq resources_default.cput = 10000:00:00
set queue workq resources_default.nodect = 1
set queue workq resources_default.walltime = 10000:00:00
set queue workq enabled = True
set queue workq started = True
#
# Create and define queue parallel
#
create queue parallel
set queue parallel queue_type = Execution
set queue parallel resources_max.cput = 10000:00:00
set queue parallel resources_max.ncpus = 200
set queue parallel resources_max.nodect = 64
set queue parallel resources_max.nodes = 200:ppn=4
set queue parallel resources_max.walltime = 10000:00:00
set queue parallel resources_min.ncpus = 1
set queue parallel resources_min.nodect = 1
set queue parallel resources_default.ncpus = 1
set queue parallel resources_default.nodect = 1
set queue parallel resources_default.nodes = 1:ppn=1
set queue parallel resources_default.walltime = 10000:00:00
set queue parallel enabled = True
set queue parallel started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_host_enable = False
set server acl_user_enable = False
set server default_queue = serial
set server log_events = 127
set server mail_from = adm
set server query_other_jobs = True
set server resources_available.ncpus = 200
set server resources_available.nodect = 64
set server resources_available.nodes = 200
set server resources_default.neednodes = 1
set server resources_default.nodect = 1
set server resources_default.nodes = 1
set server resources_max.ncpus = 200
set server resources_max.nodes = 200
set server scheduler_iteration = 60
set server node_check_rate = 150
set server tcp_timeout = 6
set server default_node = 1
set server pbs_version = 2.0.0p8



Thanks 
  
Nilesh Mistry 
Academic Computing Services 
[EMAIL PROTECTED] & TEL Campus 
Seneca College Of Applies Arts & Technology 
70 The Pond Road 
Toronto, Ontario 
M3J 3M6 Canada 
Phone 416 491 5050 ext 3788 
Fax 416 661 4695
http://acs.senecac.on.ca



Michael Edwards wrote:
> We'd need your script and the qsub command you used, possibly more
> configuration information from maui and torque, to be much help.
>
> I don't know that we have anyone who is deep with maui or torque right
> now, you might also want to ask on the maui or torque lists.
>
> >From the other posts you have made this error seems to be one of those
> general "Something is Broken" messages that could have many causes.
>
> On 9/17/07, Nilesh Mistry <[EMAIL PROTECTED]> wrote:
>   
>> Hello
>>
>> I am having problems submitting job that requires 23 threads.  I keep
>> getting the following error:
>>
>> ERROR: Number of meshes not equal to number of thread
>>
>> Hardware:
>> 10 quad core nodes (therefore 40 processors available)
>>
>> What do I need to insure in my job queue (qmgr) , maui (maui.cfg) and
>> my submit script when using qsub?
>>
>> Any and all help is greatly appreciated.
>>
>> --
>> Thanks
>>
>> Nilesh Mistry
>> Academic Computing Services
>> [EMAIL PROTECTED] & TEL Campus
>> Seneca College Of Applies Arts & Technology
>> 70 The Pond Road
>> Toronto, Ontario
>> M3J 3M6 Canada
>> Phone 416 491 5050 ext 3788
>> Fax 416 661 4695
>> http://acs.senecac.on.ca
>>
>>
>>
>> -------------------------------------------------------------------------
>> This SF.net email is sponsored by: Microsoft
>> Defy all challenges. Microsoft(R) Visual Studio 2005.
>> http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
>> _______________________________________________
>> Oscar-users mailing list
>> Oscar-users@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/oscar-users
>>
>>     
>
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Microsoft
> Defy all challenges. Microsoft(R) Visual Studio 2005.
> http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
> _______________________________________________
> Oscar-users mailing list
> Oscar-users@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/oscar-users
>   


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Oscar-users mailing list
Oscar-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/oscar-users

Reply via email to