Dear Openmpi Users,

I am reserving several processors with SGE upon which I want to run a number of 
openmpi jobs, all of which individually (and combined) use 
less than the reserved number of processors. The code I am using uses 
BLACS, and when blacs_pinfo is called I get a seg fault. If the code doesn't 
call blacs_pinfo it runs fine being submitted in this manner. blacs_pinfo 
simply returns the number of available processors, so I suspect this is an 
issue with SGE and openmpi and the requested node number being different to 
that given to mpirun.


Can anyone explain why this would happen with openmpi jobs using BLACS  on the 
SGE? And suggest maybe a way around it?


Many thanks

Conn


example submission script:
#!/bin/bash -f -l
#$ -V 
#$ -N test 
#$ -S /bin/bash
#$ -cwd
#$ -l vf=1800M
#$ -pe ib-ompi 12 
#$ -q infiniband.q


    BIN=~/bin/program
    fori inXPOL,YPOL,ZPOL;do
       mkdir ${TMPDIR}/4ZP;
       mkdir ${TMPDIR}/4ZP/$i;
       cp ./4ZP/$i/*${TMPDIR}/4ZP/$i;
    done

    cd ${TMPDIR}/4ZP/XPOL;
    mpirun -np 4-machinefile ${TMPDIR}/machines $BIN >output &
    cd ${TMPDIR}/4ZP/YPOL;
    mpirun -np 4-machinefile ${TMPDIR}/machines $BIN >output &
    cd ${TMPDIR}/4ZP/ZPOL;
    mpirun -np 4-machinefile ${TMPDIR}/machines $BIN >output ;

    fori in XPOL YPOL ZPOL  ;do
     cp ${TMPDIR}/4ZP/$i/*${HOME}/4ZP/$i;
    done


blacs_pinfo.c:
#include "Bdef.h"

#if (INTFACE == C_CALL)
void Cblacs_pinfo(int *mypnum,int *nprocs)
#else
F_VOID_FUNC blacs_pinfo_(int *mypnum,int *nprocs)
#endif
{
   int ierr;
   extern int BI_Iam,BI_Np;

/*
 *Ifthis is our first call,will need to setup some stuff
 */
   if(BI_F77_MPI_COMM_WORLD ==NULL)
   {
/*
 *   TheBLACS always call f77's mpi_init.  If the user is using C, he should
 *    explicitly call MPI_Init . . .
 */
      MPI_Initialized(nprocs);
#ifdef MainInF77
      if (!(*nprocs)) bi_f77_init_();
#else
      if (!(*nprocs))
         BI_BlacsErr(-1, -1, __FILE__,
            "Users with C main programs must explicitly call MPI_Init");
#endif
      BI_F77_MPI_COMM_WORLD = (int *) malloc(sizeof(int));
#ifdef UseF77Mpi
      BI_F77_MPI_CONSTANTS = (int *) malloc(23*sizeof(int));
      ierr = 1;
      bi_f77_get_constants_(BI_F77_MPI_COMM_WORLD, &ierr, BI_F77_MPI_CONSTANTS);
#else
      ierr = 0;
      bi_f77_get_constants_(BI_F77_MPI_COMM_WORLD, &ierr, nprocs);
#endif
      BI_MPI_Comm_size(BI_MPI_COMM_WORLD, &BI_Np, ierr);
      BI_MPI_Comm_rank(BI_MPI_COMM_WORLD, &BI_Iam, ierr);
   }
   *mypnum = BI_Iam;
   *nprocs = BI_Np;
}

Reply via email to