Hi, When running MTT on the Cray XT3/XT4 machines, I found that MTT does not contain any support for ALPS. As a result, it always executes mpirun with "-np 1". I patched lib/MTT/Values/Functions.pm with the following to overcome this:
-----Original Message----- From: Matney Sr, Kenneth D. Sent: Wednesday, August 13, 2008 5:57 PM To: Shipman, Galen M. Cc: Graham, Richard L. Subject: FW: ALPS modifications for MTT --- Functions-bak.pm 2008-08-06 14:31:26.256538000 -0400 +++ Functions.pm 2008-08-13 17:43:40.273641000 -0400 @@ -602,6 +602,8 @@ # Resource managers return "SLURM" if slurm_job(); + return "ALPS" + if alps_job(); return "TM" if pbs_job(); return "N1GE" @@ -638,6 +640,8 @@ # Resource managers return slurm_max_procs() if slurm_job(); + return alps_max_procs() + if alps_job(); return pbs_max_procs() if pbs_job(); return n1ge_max_procs() @@ -670,6 +674,8 @@ # Resource managers return slurm_hosts() if slurm_job(); + return alps_hosts() + if alps_job(); return pbs_hosts() if pbs_job(); return n1ge_hosts() @@ -1004,6 +1010,70 @@ #----------------------------------------------------------------------- --- +# Return "1" if we're running in an ALPS job; "0" otherwise. +sub alps_job { + Debug("&alps_job\n"); + +# It is true that ALPS can be run in an interactive access mode; however, +# this would not be a true managed environment. Such only can be +# achieved under a batch scheduler. + return ((exists($ENV{BATCH_PARTITION_ID}) && + exists($ENV{PBS_NNODES})) ? "1" : "0"); +} + +#---------------------------------------------------------------------- ---- + +# If in an ALPS job, return the max number of processes we can run. +# Otherwise, return 0. +sub alps_max_procs { + Debug("&alps_max_procs\n"); + + return "0" + if (!alps_job()); + +# If we were not running under PBS or some other batch system, we would +# not have the foggiest idea of how many processes mpirun could spawn. + my $ret; + $ret=$ENV{PBS_NNODES}; + + Debug("&alps_max_procs returning: $ret\n"); + return "$ret"; +} + +#---------------------------------------------------------------------- ---- + +# If in an ALPS job, return the hosts we can run on. Otherwise, return +# "". +sub alps_hosts { + Debug("&alps_hosts\n"); + + return "" + if (!alps_job()); + +# Again, we need a batch system to achieve management; return the uniq'ed +# contents of $PBS_HOSTFILE. Actually, on the Cray XT, we can return the +# NIDS allocated by ALPS; but, without launching servers to other service +# nodes, all communication is via the launching node and NIDS actually +# have no persistent resource allocated to the user. That is, all file +# resources accessible from a NID are shared with the launching node. +# And, since ALPS is managed by the batch system, only the launching node +# can initiate communication with a NID. In effect, the Cray XT model is +# of a single service node with a varying number of compute processors. + open (FILE, $ENV{PBS_NODEFILE}) || return ""; + my $lines; + while (<FILE>) { + chomp; + $lines->{$_} = 1; + } + + my @hosts = sort(keys(%$lines)); + my $hosts = join(",", @hosts); + Debug("&alps_hosts returning: $hosts\n"); + return "$hosts"; +} + +#---------------------------------------------------------------------- ---- + # Return "1" if we're running in a PBS job; "0" otherwise. sub pbs_job { Debug("&pbs_job\n"); -- Ken