Dear Matt,
I cannot rewrite this example using the MatCreateFFT calls because it is
not clear to me which is the shape and the ordering of the vectors
produced by MatScatterPetscToFFTW. Furthermore, if I understand
correctly this function returns the c2r / r2c Fourier transforms, and
not the c2c transforms (unless PETSc is configured for complex data).
However, I have rewritten the example using FFTW instead of PFFT
(attached), and it runs fine both in serial and in parallel (with a cpu
count that should be a power of two). However, I prefer using PFFT over
FFTW, so if somebody has comments on this library, it would be great to
know.
Thank you,
Giuseppe
On 11/23/2015 03:09 PM, Matthew Knepley wrote:
On Mon, Nov 23, 2015 at 1:31 AM, Giuseppe Pitton <[email protected]
<mailto:[email protected]>> wrote:
Dear users and developers,
I am trying to interface PETSc with the parallel fast Fourier
transform library PFFT
(https://www-user.tu-chemnitz.de/~potts/workgroup/pippig/software.php.en
<https://www-user.tu-chemnitz.de/%7Epotts/workgroup/pippig/software.php.en>),
based in turn on FFTW. My plan is to build a spectral
differentiation code, and in the attached files you can see a
simple example.
Is it possible to write this example using the existing calls?
http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatCreateFFT.html
That way we would have a baseline we could both run that works, and
then we could look
at something broken.
Thanks,
Matt
The code works correctly in serial, but in parallel there are some
problems regarding the output of the results, I think due to some
differences in the way PETSc and PFFT store data, but I'm not sure
if this is really the issue.
In the attached code, the number of processors used should be
specified at compile time in the variable "ncpus". As long as
ncpus = 1, everything works fine, but if ncpus = 2 or an higher
power of 2, the code terminates correctly but the results show
some artifacts, as you can see from the generated hdf5 file, named
"output-pfft.h5".
In the makefile the variables PFFTINC, PFFTLIB, FFTWINC and
FFTWLIB should be set correctly.
Thank you,
Giuseppe
--
What most experimenters take for granted before they begin their
experiments is infinitely more interesting than any results to which
their experiments lead.
-- Norbert Wiener
PFFTINC = /pfft-1.0.8/include
FFTWINC = /fftw-3.3.4/include
PFFTLIB = /pfft-1.0.8/lib
FFTWLIB = /fftw-3.3.4/lib
CFLAGS =-std=c99 -I${PFFTINC} -I${FFTWINC} -L${PFFTLIB} -L${FFTWLIB}
CLEANFILES = output-pfft.h5 test-pfft
NP = 1
include ${PETSC_DIR}lib/petsc/conf/variables
include ${PETSC_DIR}lib/petsc/conf/rules
test-fftw: test-fftw.o chkopts
-${CLINKER} -o test-fftw test-fftw.o -lm -lfftw3 -lfftw3_mpi
${PETSC_LIB}
${RM} test-fftw.o
test-pfft: test-pfft.o chkopts
-${CLINKER} -o test-pfft test-pfft.o -lm -lpfft -lfftw3 -lfftw3_mpi
${PETSC_LIB}
${RM} test-pfft.o
include ${PETSC_DIR}lib/petsc/conf/test
static char help[] = "A test of PETSc-PFFT cohoperation \n\n";
/*
* Remark: the number of cpus must be specified by setting
* the variable ncpus before compiling
*/
#include <complex.h>
#include <petscmat.h>
#include <petscdmda.h>
#include <petscviewerhdf5.h>
#include <fftw3-mpi.h>
/* user-defined functions */
PetscErrorCode InitialConditions(DM,Vec,PetscInt,PetscScalar);
int main(int argc,char **args)
{
PetscErrorCode ierr;
PetscMPIInt rank,size;
PetscInt N0=pow(2,10),N1=N0,N=N0*N1,
ncpus = 2;
const ptrdiff_t n[]={N0,N1};
ptrdiff_t alloc_local,local_no,local_o_start;
Vec u,ux;
Vec u_out;
PetscViewer viewer;
PetscScalar Lx;
PetscScalar *ptr_u,*ptr_ux;
fftw_complex *uhat,*uhat_x;
fftw_plan fplan_u,bplan_x;
DM da;
MPI_Comm comm_cart_1d;
ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);
ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
Lx = 5.;
ierr = DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_STAR,N0,N1,1,size,1,1,NULL,NULL,&da);CHKERRQ(ierr);
ierr = DMDASetUniformCoordinates(da,0.,Lx*2.*PETSC_PI,0.,Lx*2.*PETSC_PI,0.,0.);CHKERRQ(ierr);
fftw_mpi_init();
alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_no,&local_o_start);
PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
PetscPrintf(PETSC_COMM_SELF,"local_no: %d %d\n",(PetscInt)local_no);
PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d %d\n",(PetscInt)local_o_start);
// uhat is the Fourier transform of u
uhat = fftw_alloc_complex(alloc_local);
// Fourier transform of the x-derivative of u:
uhat_x = fftw_alloc_complex(alloc_local);
// allocate a real array for u
ierr = PetscMalloc1(N0/size*N1*sizeof(double),&ptr_u);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,N0/size*N1,N,(const PetscScalar*)ptr_u,&u);CHKERRQ(ierr);
// allocate a real array for ux, the x-derivative of u
ierr = PetscMalloc1(N0/size*N1*sizeof(double),&ptr_ux);CHKERRQ(ierr);
ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,N0/size*N1,N,(const PetscScalar*)ptr_ux,&ux);CHKERRQ(ierr);
// u_out is used for output only
DMCreateGlobalVector(da,&u_out);
ierr = InitialConditions(da,u_out,N0,Lx);CHKERRQ(ierr);
ierr = VecCopy(u_out,u);CHKERRQ(ierr);
/* now computing x derivative */
// fplan_u is the Fourier transform of u -> uhat
fplan_u = fftw_mpi_plan_dft_2d(N0,N1,ptr_u,uhat,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT);
// fplan_u is the inverse Fourier transform of uhat_x -> ux
bplan_x = fftw_mpi_plan_dft_2d(N0,N1,uhat_x,(fftw_complex*)ptr_ux,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
//pfft_plan bplan_u = pfft_plan_dft(2,n,uhat,(fftw_complex*)ptr_ux,comm_cart_1d,PFFT_BACKWARD,PFFT_TRANSPOSED_NONE);
fftw_execute(fplan_u);
// renormalize uhat
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1;i++)
uhat[j*N1+i] /= (double)(N);
// multiply uhat by Ii to obtain uhat_x
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=0;i<N1/2;i++)
uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
for (unsigned int j=0;j<local_no;j++)
for (unsigned int i=N1/2;i<N1;i++)
uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
fftw_execute(bplan_x);
#if defined(PETSC_HAVE_HDF5)
char sbuf[] = "output-fftw.h5";
PetscPrintf(PETSC_COMM_WORLD,"writing vector in hdf5 to %s\n",sbuf);
PetscViewerHDF5Open(PETSC_COMM_WORLD,sbuf,FILE_MODE_WRITE,&viewer);
PetscViewerHDF5SetBaseDimension2(viewer,PETSC_TRUE);
PetscViewerSetFromOptions(viewer);
VecCopy(u,u_out);
PetscObjectSetName((PetscObject) u_out, "u");
VecView(u_out, viewer);
VecCopy(ux,u_out);
PetscObjectSetName((PetscObject) u_out, "ux");
VecView(u_out, viewer);
PetscViewerDestroy(&viewer);
#endif
/* Free spaces */
ierr = VecDestroy(&u); CHKERRQ(ierr);
ierr = VecDestroy(&ux); CHKERRQ(ierr);
ierr = PetscFree(ptr_u); CHKERRQ(ierr);
ierr = PetscFree(ptr_ux);CHKERRQ(ierr);
fftw_destroy_plan(fplan_u);
fftw_destroy_plan(bplan_x);
fftw_free(uhat);
fftw_free(uhat_x);
ierr = VecDestroy(&u_out);CHKERRQ(ierr);
ierr = DMDestroy(&da);CHKERRQ(ierr);
fftw_mpi_cleanup();
ierr = PetscFinalize();
return 0;
}
PetscErrorCode InitialConditions(DM da,Vec v,PetscInt N0,PetscScalar Lx)
{
PetscErrorCode ierr;
PetscInt N1 = N0;
PetscScalar **v_localptr,r,x0,x1,Ly = Lx,
h0 = 2.*PETSC_PI/(double)N0,
h1 = 2.*PETSC_PI/(double)N1;
PetscInt mstart,nstart,m,n;
ierr = DMDAGetCorners(da,&mstart,&nstart,0,&m,&n,0);CHKERRQ(ierr);
ierr = DMDAVecGetArray(da,v,&v_localptr);CHKERRQ(ierr);
for (unsigned int j=nstart;j<nstart+n;j++)
for (unsigned int i=mstart;i<mstart+m;i++)
{
x0 = Lx*h0*(PetscScalar)i;
x1 = Ly*h1*(PetscScalar)j;
r = PetscSqrtScalar(PetscSqr(x0-Lx*PETSC_PI)+PetscSqr(x1-Ly*PETSC_PI));
v_localptr[j][i] = -6.*4./(PetscExpScalar(2.*r)+PetscExpScalar(-2.*r)+2.);
}
ierr = DMDAVecRestoreArray(da,v,&v_localptr);CHKERRQ(ierr);
return 0;
}