Hi Reuti,

thanks for the quick reply.

I have added the -catch_rsh to the PE and now when i start a sim
(mpiexec -np $NSLOTS...) in the lsdyna.out file i see 'Error: Unknown
option -np'. When i use 'mpirun -np $NSLOTS...' i see this 'mpirun: rsh:
Command not found' in the lsdyna.err.

Petar

[petar@rocks test]$ cat lsdyna.err
mpirun: rsh: Command not found

[petar@rocks test]$ cat lsdyna.out
-catch_rsh
/opt/gridengine/default/spool/mnode01/active_jobs/32738.1/pe_hostfile
mnode01
mnode01
mnode01
mnode01
mnode01
mnode01
mnode01
mnode01
mnode02
mnode02
mnode02
mnode02
mnode02
mnode02
mnode02
mnode02
Error: Unknown option -np

[root@rocks test]# qconf -mp pmpi
pe_name            pmpi
slots              9999
user_lists         NONE
xuser_lists        NONE
start_proc_args    /opt/gridengine/mpi/pmpi/startpmpi.sh -catch_rsh
$pe_hostfile
stop_proc_args     /opt/gridengine/mpi/pmpi/stoppmpi.sh
allocation_rule    $fill_up
control_slaves     FALSE
job_is_first_task  TRUE
urgency_slots      min
accounting_summary TRUE



On 03/07/2014 12:49 PM, Reuti wrote:
> Hi,
>
> Am 07.03.2014 um 12:28 schrieb Petar Penchev:
>
>> I have a rocks-cluster 6.1 using OGS2011.11p1 and i am trying to use the
>> PlatformMPI parallel libraries. My problem is that when i submit a job
>> using qsub test.sh, the job starts only on one node with 16 processes
>> and not on both nodes. The -pe pmpi, which i am using for now  is only a
>> copy of mpi.
> The definition of the PE pmpi does also include the -catch_rsh? The recent 
> IBM/Platform-MPI can cope with a machine file in the MPICH(1) format, which 
> is created by the /usr/sge/mpi/startmpi.sh
>
> In addition you need the following settings for a tight integration. Please 
> try:
>
> ...
> export MPI_REMSH=rsh
> export MPI_TMPDIR=$TMPDIR
> mpiexec -np $NSLOTS -machinefile $TMPDIR/machines $BIN $ARGS
>
> -- Reuti
>
>
>> What am i missing? Dose anyone have a working -pe submit script, or some
>> hints how to make this working?
>>
>> Thanks in advance,
>> Petar
>>
>> [root@rocks mpi]# test.sh
>> #!/bin/bash
>> #$ -N lsdyna
>> #$ -S /bin/bash
>> #$ -pe pmpi 16
>> #$ -cwd
>> #$ -o lsdyna.out
>> #$ -e lsdyna.err
>> ###
>> #$ -q test.q
>> ### -notify
>> export MPI_ROOT=/export/apps/platform_mpi
>> export LD_LIBRARY_PATH=/export/apps/platform_mpi/lib/linux_amd64
>> export PATH=/export/apps/platform_mpi/bin
>> BIN="/export/apps/lsdyna/ls-dyna_mpp_s_r6_1_2_85274_x64_redhat54_ifort120_sse2_platformmpi.exe"
>> ARGS="i=test.k"
>> mpirun -np $NSLOTS $BIN $ARGS
>>
>>
>> [root@rocks mpi]# qconf -sq test.q
>> qname                 test.q
>> hostlist              mnode01 mnode02
>> seq_no                0
>> load_thresholds       np_load_avg=1.75
>> suspend_thresholds    NONE
>> nsuspend              1
>> suspend_interval      00:05:00
>> priority              0
>> min_cpu_interval      00:05:00
>> processors            UNDEFINED
>> qtype                 BATCH INTERACTIVE
>> ckpt_list             NONE
>> pe_list               pmpi
>> rerun                 FALSE
>> slots                 8
>> tmpdir                /tmp
>> shell                 /bin/bash
>> prolog                NONE
>> epilog                NONE
>> shell_start_mode      unix_behavior
>> starter_method        NONE
>> suspend_method        NONE
>> resume_method         NONE
>> terminate_method      NONE
>> notify                00:00:60
>> owner_list            NONE
>> user_lists            NONE
>> xuser_lists           NONE
>> subordinate_list      NONE
>> complex_values        NONE
>> projects              NONE
>> xprojects             NONE
>> calendar              NONE
>> initial_state         default
>> s_rt                  INFINITY
>> h_rt                  INFINITY
>> s_cpu                 INFINITY
>> h_cpu                 INFINITY
>> s_fsize               INFINITY
>> h_fsize               INFINITY
>> s_data                INFINITY
>> h_data                INFINITY
>> s_stack               INFINITY
>> h_stack               INFINITY
>> s_core                INFINITY
>> h_core                INFINITY
>> s_rss                 INFINITY
>> h_rss                 INFINITY
>> s_vmem                INFINITY
>> h_vmem                INFINITY
>> _______________________________________________
>> users mailing list
>> [email protected]
>> https://gridengine.org/mailman/listinfo/users

_______________________________________________
users mailing list
[email protected]
https://gridengine.org/mailman/listinfo/users

Reply via email to