Dear users and developers,
I am trying to interface PETSc with the parallel fast Fourier transform
library PFFT
(https://www-user.tu-chemnitz.de/~potts/workgroup/pippig/software.php.en),
based in turn on FFTW. My plan is to build a spectral differentiation
code, and in the attached files you can see a simple example.
The code works correctly in serial, but in parallel there are some
problems regarding the output of the results, I think due to some
differences in the way PETSc and PFFT store data, but I'm not sure if
this is really the issue.
In the attached code, the number of processors used should be specified
at compile time in the variable "ncpus". As long as ncpus = 1,
everything works fine, but if ncpus = 2 or an higher power of 2, the
code terminates correctly but the results show some artifacts, as you
can see from the generated hdf5 file, named "output-pfft.h5".
In the makefile the variables PFFTINC, PFFTLIB, FFTWINC and FFTWLIB
should be set correctly.
Thank you,
Giuseppe
PFFTINC = /scratch/opt/pfft-dev/include
FFTWINC = /scratch/opt/fftw-3.3.4/include
PFFTLIB = /scratch/opt/pfft-dev/lib
FFTWLIB = /scratch/opt/fftw-3.3.4/lib
CFLAGS =-std=c99 -I${PFFTINC} -I${FFTWINC} -L${PFFTLIB} -L${FFTWLIB}
CLEANFILES = output-pfft.h5 test-pfft
NP = 1
include ${PETSC_DIR}lib/petsc/conf/variables
include ${PETSC_DIR}lib/petsc/conf/rules
test-pfft: test-pfft.o chkopts
-${CLINKER} -o test-pfft test-pfft.o -lm -lpfft -lfftw3 -lfftw3_mpi
${PETSC_LIB}
${RM} test-pfft.o
include ${PETSC_DIR}lib/petsc/conf/test
static char help[] = "A test of PETSc-PFFT cohoperation \n\n";
/*
* Remark: the number of cpus must be specified by setting
* the variable ncpus before compiling
*/
#include <complex.h>
#include <petscmat.h>
#include <petscdmda.h>
#include <petscviewerhdf5.h>
#include <pfft.h>
/* user-defined functions */
PetscErrorCode InitialConditions(DM,Vec,PetscInt,PetscScalar);
int main(int argc,char **args)
{
PetscErrorCode ierr;
PetscMPIInt rank,size;
PetscInt N0=pow(2,10),N1=N0,N=N0*N1,
ncpus = 2;
const ptrdiff_t n[]={N0,N1};
ptrdiff_t alloc_local,local_no[2],
local_o_start[2],local_ni[2],
local_i_start[2];
Vec u,ux;
Vec u_out;
PetscViewer viewer;
PetscScalar Lx;
PetscScalar *ptr_u,*ptr_ux;
pfft_complex *uhat,*uhat_x;
pfft_plan fplan_u,bplan_x;
DM da;
MPI_Comm comm_cart_1d;
ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);
ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
Lx = 5.;
ierr = DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_STAR,N0,N1,1,size,1,1,NULL,NULL,&da);CHKERRQ(ierr);
ierr = DMDASetUniformCoordinates(da,0.,Lx*2.*PETSC_PI,0.,Lx*2.*PETSC_PI,0.,0.);CHKERRQ(ierr);
pfft_init();
pfft_create_procmesh_1d(PETSC_COMM_WORLD,ncpus,&comm_cart_1d);
alloc_local = pfft_local_size_dft(2,n,comm_cart_1d,PFFT_TRANSPOSED_NONE,local_no,local_o_start,local_ni,local_i_start);
PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
PetscPrintf(PETSC_COMM_SELF,"local_no: %d %d\n",(PetscInt)local_no[0],(PetscInt)local_no[1]);
PetscPrintf(PETSC_COMM_SELF,"local_ni: %d %d\n",(PetscInt)local_ni[0],(PetscInt)local_ni[1]);
PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d %d\n",(PetscInt)local_o_start[0],(PetscInt)local_o_start[1]);
PetscPrintf(PETSC_COMM_SELF,"local_i_start: %d %d\n",(PetscInt)local_i_start[0],(PetscInt)local_i_start[1]);
// uhat is the Fourier transform of u
uhat = pfft_alloc_complex(alloc_local);
// Fourier transform of the x-derivative of u:
uhat_x = pfft_alloc_complex(alloc_local);
// allocate a real array for u
ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_u);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_u,&u);CHKERRQ(ierr);
// allocate a real array for ux, the x-derivative of u
ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_ux);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_ux,&ux);CHKERRQ(ierr);
// u_out is used for output only
DMCreateGlobalVector(da,&u_out);
ierr = InitialConditions(da,u_out,N0,Lx);CHKERRQ(ierr);
ierr = VecCopy(u_out,u);CHKERRQ(ierr);
/* now computing x derivative */
// fplan_u is the Fourier transform of u -> uhat
fplan_u = pfft_plan_dft(2,n,ptr_u,uhat,comm_cart_1d,PFFT_FORWARD,PFFT_TRANSPOSED_NONE|PFFT_PRESERVE_INPUT);
// fplan_u is the inverse Fourier transform of uhat_x -> ux
bplan_x = pfft_plan_dft(2,n,uhat_x,ptr_ux,comm_cart_1d,PFFT_BACKWARD,PFFT_TRANSPOSED_NONE);
pfft_execute(fplan_u);
// renormalize uhat
for (unsigned int j=0;j<local_no[0];j++)
for (unsigned int i=0;i<N1;i++)
uhat[j*N1+i] /= (double)(N);
// multiply uhat by Ii to obtain uhat_x
for (unsigned int j=0;j<local_no[0];j++)
for (unsigned int i=0;i<N1/2;i++)
uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
for (unsigned int j=0;j<local_no[0];j++)
for (unsigned int i=N1/2;i<N1;i++)
uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
pfft_execute(bplan_x);
#if defined(PETSC_HAVE_HDF5)
char sbuf[] = "output-pfft.h5";
PetscPrintf(PETSC_COMM_WORLD,"writing vector in hdf5 to %s\n",sbuf);
PetscViewerHDF5Open(PETSC_COMM_WORLD,sbuf,FILE_MODE_WRITE,&viewer);
PetscViewerHDF5SetBaseDimension2(viewer,PETSC_TRUE);
PetscViewerSetFromOptions(viewer);
VecCopy(u,u_out);
PetscObjectSetName((PetscObject) u_out, "u");
VecView(u_out, viewer);
VecCopy(ux,u_out);
PetscObjectSetName((PetscObject) u_out, "ux");
VecView(u_out, viewer);
PetscViewerDestroy(&viewer);
#endif
/* Free spaces */
ierr = VecDestroy(&u); CHKERRQ(ierr);
ierr = VecDestroy(&ux); CHKERRQ(ierr);
ierr = PetscFree(ptr_u); CHKERRQ(ierr);
ierr = PetscFree(ptr_ux);CHKERRQ(ierr);
pfft_destroy_plan(fplan_u);
pfft_destroy_plan(bplan_x);
pfft_free(uhat);
pfft_free(uhat_x);
ierr = VecDestroy(&u_out);CHKERRQ(ierr);
ierr = DMDestroy(&da);CHKERRQ(ierr);
MPI_Comm_free(&comm_cart_1d);
ierr = PetscFinalize();
return 0;
}
PetscErrorCode InitialConditions(DM da,Vec v,PetscInt N0,PetscScalar Lx)
{
PetscErrorCode ierr;
PetscInt N1 = N0;
PetscScalar **v_localptr,r,x0,x1,Ly = Lx,
h0 = 2.*PETSC_PI/(double)N0,
h1 = 2.*PETSC_PI/(double)N1;
PetscInt mstart,nstart,m,n;
ierr = DMDAGetCorners(da,&mstart,&nstart,0,&m,&n,0);CHKERRQ(ierr);
ierr = DMDAVecGetArray(da,v,&v_localptr);CHKERRQ(ierr);
for (unsigned int j=nstart;j<nstart+n;j++)
for (unsigned int i=mstart;i<mstart+m;i++)
{
x0 = Lx*h0*(PetscScalar)i;
x1 = Ly*h1*(PetscScalar)j;
r = PetscSqrtScalar(PetscSqr(x0-Lx*PETSC_PI)+PetscSqr(x1-Ly*PETSC_PI));
v_localptr[j][i] = -6.*4./(PetscExpScalar(2.*r)+PetscExpScalar(-2.*r)+2.);
}
ierr = DMDAVecRestoreArray(da,v,&v_localptr);CHKERRQ(ierr);
return 0;
}