Thanks Barry. Indeed, the problem is the different vector distribution
between PETSc and FFTW/PFFT.
This however still leaves open the issue of how to deal with an array
coming from FFTW.
In the attached code, the vectors u and uy are saved correctly in hdf5,
but for some reason the vector ux is not (not in parallel at least). I
cannot find the error, to me the three vectors look as they are written
exactly in the same way.
Giuseppe
On 11/23/2015 11:03 PM, Barry Smith wrote:
Your issues are likely due to a difference in how PETSc and pfft() think the
"vectors" are laid out across processes
You seem to assume that DMDACreate2d() and pfft_create_procmesh() will make the
same decisions about parallel layout; you print some information from the
pfft_create()
pfft_init();
pfft_create_procmesh_1d(PETSC_COMM_WORLD,ncpus,&comm_cart_1d);
alloc_local =
pfft_local_size_dft(2,n,comm_cart_1d,PFFT_TRANSPOSED_NONE,local_no,local_o_start,local_ni,local_i_start);
PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
PetscPrintf(PETSC_COMM_SELF,"local_no: %d
%d\n",(PetscInt)local_no[0],(PetscInt)local_no[1]);
PetscPrintf(PETSC_COMM_SELF,"local_ni: %d
%d\n",(PetscInt)local_ni[0],(PetscInt)local_ni[1]);
PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d
%d\n",(PetscInt)local_o_start[0],(PetscInt)local_o_start[1]);
PetscPrintf(PETSC_COMM_SELF,"local_i_start: %d
%d\n",(PetscInt)local_i_start[0],(PetscInt)local_i_start[1]);
but do not check anything about the layout DMDACreate2d() selected. First you
need to call DMDAGetInfo() and DMDAGetLocalInfo() to see the layout PETSc is
using and make sure it matches pfft
Barry
On Nov 23, 2015, at 1:31 AM, Giuseppe Pitton <[email protected]> wrote:
Dear users and developers,
I am trying to interface PETSc with the parallel fast Fourier transform library
PFFT (https://www-user.tu-chemnitz.de/~potts/workgroup/pippig/software.php.en),
based in turn on FFTW. My plan is to build a spectral differentiation code, and
in the attached files you can see a simple example.
The code works correctly in serial, but in parallel there are some problems
regarding the output of the results, I think due to some differences in the way
PETSc and PFFT store data, but I'm not sure if this is really the issue.
In the attached code, the number of processors used should be specified at compile time in the
variable "ncpus". As long as ncpus = 1, everything works fine, but if ncpus = 2 or an
higher power of 2, the code terminates correctly but the results show some artifacts, as you can
see from the generated hdf5 file, named "output-pfft.h5".
In the makefile the variables PFFTINC, PFFTLIB, FFTWINC and FFTWLIB should be
set correctly.
Thank you,
Giuseppe
<makefile.txt><test-pfft.c>
static char help[] = "A test of PETSc-FFTW cohoperation \n\n";
#include <complex.h>
#include <petscmat.h>
#include <petscdmda.h>
#include <petscviewerhdf5.h>
#include <fftw3-mpi.h>
/* user-defined functions */
PetscErrorCode InitialConditions(DM,Vec,PetscScalar*,PetscInt,PetscScalar);
int main(int argc,char **args)
{
PetscErrorCode ierr;
PetscMPIInt rank,size;
PetscInt N0=pow(2,10),N1=N0,N=N0*N1;
ptrdiff_t alloc_local,local_no,local_o_start;
Vec u,ux,uy;
Vec u_out;
PetscViewer viewer;
PetscScalar Lx = 5.;
PetscScalar *ptr_u,*ptr_ux,*ptr_uy;
fftw_complex *uhat,*uhat_x,*uhat_y;
fftw_plan fplan_u,bplan_x,bplan_y;
DM da;
ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);
ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
ierr = DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_STAR,N0,N1,size,1,1,1,NULL,NULL,&da);CHKERRQ(ierr);
ierr = DMDASetUniformCoordinates(da,0.,Lx*2.*PETSC_PI,0.,Lx*2.*PETSC_PI,0.,0.);CHKERRQ(ierr);
fftw_mpi_init();
alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_no,&local_o_start);
PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
PetscPrintf(PETSC_COMM_SELF,"local_no: %d\n",(PetscInt)local_no);
PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d\n",(PetscInt)local_o_start);
// uhat is the Fourier transform of u
uhat = fftw_alloc_complex(alloc_local);
// Fourier transform of the x-derivative of u:
uhat_x = fftw_alloc_complex(alloc_local);
// Fourier transform of the y-derivative of u:
uhat_y = fftw_alloc_complex(alloc_local);
// allocate a real array for u
ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_u);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_u,&u);CHKERRQ(ierr);
// allocate a real array for ux, the x-derivative of u
ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_ux);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_ux,&ux);CHKERRQ(ierr);
// allocate a real array for uy, the y-derivative of u
ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_uy);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_uy,&uy);CHKERRQ(ierr);
// u_out is used for output only
DMCreateGlobalVector(da,&u_out);
ierr = InitialConditions(da,u_out,ptr_u,N0,Lx);CHKERRQ(ierr);
//ierr = VecCopy(u_out,u);CHKERRQ(ierr);
/* now computing x derivative */
// fplan_u is the Fourier transform of u -> uhat
fplan_u = fftw_mpi_plan_dft_2d(N0,N1,ptr_u,uhat,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT);
// fplan_u is the inverse Fourier transform of uhat_x -> ux
bplan_x = fftw_mpi_plan_dft_2d(N0,N1,uhat_x,(fftw_complex*)ptr_ux,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
// fplan_u is the inverse Fourier transform of uhat_y -> uy
bplan_y = fftw_mpi_plan_dft_2d(N0,N1,uhat_y,(fftw_complex*)ptr_uy,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
fftw_execute(fplan_u);
// renormalize uhat
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1;i++)
uhat[j*N1+i] /= (double)(N);
// multiply uhat by Ii to obtain uhat_x
if (size == 1){
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1/2;i++)
uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=N1/2;i<N1;i++)
uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
} else {
if (rank < size/2)
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1/2;i++)
uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
else
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=N1/2;i<N1;i++)
uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
}
// multiply uhat by Ij to obtain uhat_y
if (size == 1){
for (unsigned int j=0;j<local_no/2;j++)
for (unsigned int i=0;i<N1;i++)
uhat_y[j*N1+i] = I*(double)j*uhat[j*N1+i]/Lx/(double)N0;
for (unsigned int j=local_no/2;j<local_no;j++)
for (unsigned int i=0;i<N1;i++)
uhat_y[j*N1+i] = -I*(double)(N1-j)*uhat[j*N1+i]/Lx/(double)N0;
} else {
if (rank < size/2)
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1;i++)
uhat_y[j*N1+i] = I*(double)(j+local_o_start)*uhat[j*N1+i]/Lx/(double)N0;
else
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1;i++)
uhat_y[j*N1+i] = -I*(double)(N1-(j+local_o_start))*uhat[j*N1+i]/Lx/(double)N0;
}
fftw_execute(bplan_x);
fftw_execute(bplan_y);
#if defined(PETSC_HAVE_HDF5)
char sbuf[] = "output-fftw.h5";
PetscPrintf(PETSC_COMM_WORLD,"writing vector in hdf5 to %s\n",sbuf);
PetscViewerHDF5Open(PETSC_COMM_WORLD,sbuf,FILE_MODE_WRITE,&viewer);
PetscViewerHDF5SetBaseDimension2(viewer,PETSC_TRUE);
PetscViewerSetFromOptions(viewer);
PetscScalar **v_localptr;
PetscInt mstart,nstart,ml,nl;
ierr = DMDAGetCorners(da,&mstart,&nstart,0,&ml,&nl,0);CHKERRQ(ierr);
ierr = DMDAVecGetArray(da,u_out,&v_localptr);CHKERRQ(ierr);
for (unsigned int j=nstart;j<nstart+nl;j++)
for (unsigned int i=mstart;i<mstart+ml;i++)
v_localptr[j][i] = ptr_u[j*N1+i];
ierr = DMDAVecRestoreArray(da,u_out,&v_localptr);CHKERRQ(ierr);
PetscObjectSetName((PetscObject) u_out, "u");
VecView(u_out, viewer);
PetscPrintf(PETSC_COMM_WORLD,"u written\n");
ierr = DMDAVecGetArray(da,u_out,&v_localptr);CHKERRQ(ierr);
for (unsigned int j=nstart;j<nstart+nl;j++)
for (unsigned int i=mstart;i<mstart+ml;i++)
v_localptr[j][i] = ptr_ux[j*N1+i];
ierr = DMDAVecRestoreArray(da,u_out,&v_localptr);CHKERRQ(ierr);
PetscObjectSetName((PetscObject) u_out, "ux");
VecView(u_out, viewer);
PetscPrintf(PETSC_COMM_WORLD,"ux written\n");
ierr = DMDAVecGetArray(da,u_out,&v_localptr);CHKERRQ(ierr);
for (unsigned int j=nstart;j<nstart+nl;j++)
for (unsigned int i=mstart;i<mstart+ml;i++)
v_localptr[j][i] = ptr_uy[j*N1+i];
ierr = DMDAVecRestoreArray(da,u_out,&v_localptr);CHKERRQ(ierr);
PetscObjectSetName((PetscObject) u_out, "uy");
VecView(u_out, viewer);
PetscPrintf(PETSC_COMM_WORLD,"uy written\n");
DMView(da,viewer);
PetscViewerDestroy(&viewer);
#endif
/* Free spaces */
ierr = VecDestroy(&u); CHKERRQ(ierr);
ierr = VecDestroy(&ux); CHKERRQ(ierr);
ierr = VecDestroy(&uy); CHKERRQ(ierr);
ierr = PetscFree(ptr_u); CHKERRQ(ierr);
ierr = PetscFree(ptr_ux);CHKERRQ(ierr);
ierr = PetscFree(ptr_uy);CHKERRQ(ierr);
fftw_destroy_plan(fplan_u);
fftw_destroy_plan(bplan_x);
fftw_destroy_plan(bplan_y);
fftw_free(uhat);
fftw_free(uhat_x);
fftw_free(uhat_y);
ierr = VecDestroy(&u_out);CHKERRQ(ierr);
ierr = DMDestroy(&da);CHKERRQ(ierr);
fftw_mpi_cleanup();
ierr = PetscFinalize();
return 0;
}
PetscErrorCode InitialConditions(DM da,Vec v,PetscScalar* ptr_u,PetscInt N0,PetscScalar Lx)
{
PetscErrorCode ierr;
PetscInt N1 = N0;
PetscScalar **v_localptr,r,x0,x1,Ly = Lx,
h0 = 2.*PETSC_PI/(double)N0,
h1 = 2.*PETSC_PI/(double)N1;
PetscInt mstart,nstart,m,n;
ierr = DMDAGetCorners(da,&mstart,&nstart,0,&m,&n,0);CHKERRQ(ierr);
ierr = DMDAVecGetArray(da,v,&v_localptr);CHKERRQ(ierr);
for (unsigned int j=nstart;j<nstart+n;j++)
for (unsigned int i=mstart;i<mstart+m;i++)
{
x0 = Lx*h0*(PetscScalar)i;
x1 = Ly*h1*(PetscScalar)j;
r = PetscSqrtScalar(PetscSqr(x0-Lx*PETSC_PI)+PetscSqr(x1-Ly*PETSC_PI));
v_localptr[j][i] = -6.*4./(PetscExpScalar(2.*r)+PetscExpScalar(-2.*r)+2.);
ptr_u[j*N0+i] = v_localptr[j][i];
}
ierr = DMDAVecRestoreArray(da,v,&v_localptr);CHKERRQ(ierr);
return 0;
}