Dear users and developers,
I am trying to interface PETSc with the parallel fast Fourier transform library PFFT (https://www-user.tu-chemnitz.de/~potts/workgroup/pippig/software.php.en), based in turn on FFTW. My plan is to build a spectral differentiation code, and in the attached files you can see a simple example. The code works correctly in serial, but in parallel there are some problems regarding the output of the results, I think due to some differences in the way PETSc and PFFT store data, but I'm not sure if this is really the issue. In the attached code, the number of processors used should be specified at compile time in the variable "ncpus". As long as ncpus = 1, everything works fine, but if ncpus = 2 or an higher power of 2, the code terminates correctly but the results show some artifacts, as you can see from the generated hdf5 file, named "output-pfft.h5". In the makefile the variables PFFTINC, PFFTLIB, FFTWINC and FFTWLIB should be set correctly.
Thank you,
Giuseppe

PFFTINC          = /scratch/opt/pfft-dev/include
FFTWINC          = /scratch/opt/fftw-3.3.4/include
PFFTLIB          = /scratch/opt/pfft-dev/lib
FFTWLIB          = /scratch/opt/fftw-3.3.4/lib

CFLAGS           =-std=c99 -I${PFFTINC} -I${FFTWINC} -L${PFFTLIB} -L${FFTWLIB}
CLEANFILES       = output-pfft.h5 test-pfft
NP               = 1

include ${PETSC_DIR}lib/petsc/conf/variables
include ${PETSC_DIR}lib/petsc/conf/rules


test-pfft: test-pfft.o  chkopts
        -${CLINKER} -o test-pfft test-pfft.o -lm -lpfft -lfftw3 -lfftw3_mpi 
${PETSC_LIB}
        ${RM} test-pfft.o

include ${PETSC_DIR}lib/petsc/conf/test
static char help[] = "A test of PETSc-PFFT cohoperation \n\n";

/*
 * Remark: the number of cpus must be specified by setting
 * the variable ncpus before compiling
*/

#include <complex.h>
#include <petscmat.h>
#include <petscdmda.h>
#include <petscviewerhdf5.h>
#include <pfft.h>

/* user-defined functions */
PetscErrorCode InitialConditions(DM,Vec,PetscInt,PetscScalar);


int main(int argc,char **args)
{
  PetscErrorCode        ierr;
  PetscMPIInt           rank,size;
  PetscInt              N0=pow(2,10),N1=N0,N=N0*N1,
                        ncpus = 2;
  const ptrdiff_t       n[]={N0,N1};
  ptrdiff_t             alloc_local,local_no[2],
                        local_o_start[2],local_ni[2],
                        local_i_start[2];
  Vec                   u,ux;
  Vec                   u_out;
  PetscViewer           viewer;
  PetscScalar           Lx;
  PetscScalar           *ptr_u,*ptr_ux;
  pfft_complex          *uhat,*uhat_x;
  pfft_plan             fplan_u,bplan_x;
  DM                    da;
  MPI_Comm              comm_cart_1d;

  ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);

  ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);

  Lx = 5.;

  ierr = DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_STAR,N0,N1,1,size,1,1,NULL,NULL,&da);CHKERRQ(ierr);
  ierr = DMDASetUniformCoordinates(da,0.,Lx*2.*PETSC_PI,0.,Lx*2.*PETSC_PI,0.,0.);CHKERRQ(ierr);


  pfft_init();
  pfft_create_procmesh_1d(PETSC_COMM_WORLD,ncpus,&comm_cart_1d);
  alloc_local = pfft_local_size_dft(2,n,comm_cart_1d,PFFT_TRANSPOSED_NONE,local_no,local_o_start,local_ni,local_i_start);

  PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
  PetscPrintf(PETSC_COMM_SELF,"local_no: %d %d\n",(PetscInt)local_no[0],(PetscInt)local_no[1]);
  PetscPrintf(PETSC_COMM_SELF,"local_ni: %d %d\n",(PetscInt)local_ni[0],(PetscInt)local_ni[1]);
  PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d %d\n",(PetscInt)local_o_start[0],(PetscInt)local_o_start[1]);
  PetscPrintf(PETSC_COMM_SELF,"local_i_start: %d %d\n",(PetscInt)local_i_start[0],(PetscInt)local_i_start[1]);


  // uhat is the Fourier transform of u
  uhat    = pfft_alloc_complex(alloc_local);
  // Fourier transform of the x-derivative of u:
  uhat_x  = pfft_alloc_complex(alloc_local);


  // allocate a real array for u
  ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_u);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_u,&u);CHKERRQ(ierr);
  // allocate a real array for ux, the x-derivative of u
  ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_ux);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_ux,&ux);CHKERRQ(ierr);

  // u_out is used for output only
  DMCreateGlobalVector(da,&u_out);

  ierr = InitialConditions(da,u_out,N0,Lx);CHKERRQ(ierr);
  ierr = VecCopy(u_out,u);CHKERRQ(ierr);




  /* now computing x derivative */
  // fplan_u is the Fourier transform of u -> uhat
  fplan_u = pfft_plan_dft(2,n,ptr_u,uhat,comm_cart_1d,PFFT_FORWARD,PFFT_TRANSPOSED_NONE|PFFT_PRESERVE_INPUT);
  // fplan_u is the inverse Fourier transform of uhat_x -> ux
  bplan_x = pfft_plan_dft(2,n,uhat_x,ptr_ux,comm_cart_1d,PFFT_BACKWARD,PFFT_TRANSPOSED_NONE);


  pfft_execute(fplan_u);

  // renormalize uhat
  for (unsigned int j=0;j<local_no[0];j++)
  for (unsigned int i=0;i<N1;i++)
       uhat[j*N1+i] /= (double)(N);


  // multiply uhat by Ii to obtain uhat_x
  for (unsigned int j=0;j<local_no[0];j++)
    for (unsigned int i=0;i<N1/2;i++)
       uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
  for (unsigned int j=0;j<local_no[0];j++)
    for (unsigned int i=N1/2;i<N1;i++)
       uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
  
  pfft_execute(bplan_x);



#if defined(PETSC_HAVE_HDF5)
  char sbuf[] = "output-pfft.h5";
  PetscPrintf(PETSC_COMM_WORLD,"writing vector in hdf5 to %s\n",sbuf);
  PetscViewerHDF5Open(PETSC_COMM_WORLD,sbuf,FILE_MODE_WRITE,&viewer);
  PetscViewerHDF5SetBaseDimension2(viewer,PETSC_TRUE);
  PetscViewerSetFromOptions(viewer);
  VecCopy(u,u_out);
  PetscObjectSetName((PetscObject) u_out, "u");
  VecView(u_out, viewer);
  VecCopy(ux,u_out);
  PetscObjectSetName((PetscObject) u_out, "ux");
  VecView(u_out, viewer);
  PetscViewerDestroy(&viewer);
#endif



  /* Free spaces */
  ierr = VecDestroy(&u);   CHKERRQ(ierr);
  ierr = VecDestroy(&ux);  CHKERRQ(ierr);
  ierr = PetscFree(ptr_u); CHKERRQ(ierr);
  ierr = PetscFree(ptr_ux);CHKERRQ(ierr);
  pfft_destroy_plan(fplan_u);
  pfft_destroy_plan(bplan_x);
  pfft_free(uhat);     
  pfft_free(uhat_x); 
  ierr = VecDestroy(&u_out);CHKERRQ(ierr);
  ierr = DMDestroy(&da);CHKERRQ(ierr);
  MPI_Comm_free(&comm_cart_1d);


  ierr = PetscFinalize();
  return 0;
}



PetscErrorCode InitialConditions(DM da,Vec v,PetscInt N0,PetscScalar Lx)
{
  PetscErrorCode        ierr;
  PetscInt              N1 = N0;
  PetscScalar           **v_localptr,r,x0,x1,Ly = Lx,
                        h0 = 2.*PETSC_PI/(double)N0,
                        h1 = 2.*PETSC_PI/(double)N1;
  PetscInt              mstart,nstart,m,n;

  ierr = DMDAGetCorners(da,&mstart,&nstart,0,&m,&n,0);CHKERRQ(ierr);
  ierr = DMDAVecGetArray(da,v,&v_localptr);CHKERRQ(ierr);
  for (unsigned int j=nstart;j<nstart+n;j++)
  for (unsigned int i=mstart;i<mstart+m;i++)
  {
    x0 = Lx*h0*(PetscScalar)i;
    x1 = Ly*h1*(PetscScalar)j;
    r = PetscSqrtScalar(PetscSqr(x0-Lx*PETSC_PI)+PetscSqr(x1-Ly*PETSC_PI));
    v_localptr[j][i] = -6.*4./(PetscExpScalar(2.*r)+PetscExpScalar(-2.*r)+2.);
  }
  ierr = DMDAVecRestoreArray(da,v,&v_localptr);CHKERRQ(ierr);

  return 0;
}

Reply via email to