Dear Matt,
I cannot rewrite this example using the MatCreateFFT calls because it is not clear to me which is the shape and the ordering of the vectors produced by MatScatterPetscToFFTW. Furthermore, if I understand correctly this function returns the c2r / r2c Fourier transforms, and not the c2c transforms (unless PETSc is configured for complex data). However, I have rewritten the example using FFTW instead of PFFT (attached), and it runs fine both in serial and in parallel (with a cpu count that should be a power of two). However, I prefer using PFFT over FFTW, so if somebody has comments on this library, it would be great to know.
Thank you,
Giuseppe




On 11/23/2015 03:09 PM, Matthew Knepley wrote:
On Mon, Nov 23, 2015 at 1:31 AM, Giuseppe Pitton <[email protected] <mailto:[email protected]>> wrote:

    Dear users and developers,
    I am trying to interface PETSc with the parallel fast Fourier
    transform library PFFT
    (https://www-user.tu-chemnitz.de/~potts/workgroup/pippig/software.php.en
    
<https://www-user.tu-chemnitz.de/%7Epotts/workgroup/pippig/software.php.en>),
    based in turn on FFTW. My plan is to build a spectral
    differentiation code, and in the attached files you can see a
    simple example.


Is it possible to write this example using the existing calls?

http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatCreateFFT.html

That way we would have a baseline we could both run that works, and then we could look
at something broken.

  Thanks,

     Matt

    The code works correctly in serial, but in parallel there are some
    problems regarding the output of the results, I think due to some
    differences in the way PETSc and PFFT store data, but I'm not sure
    if this is really the issue.
    In the attached code, the number of processors used should be
    specified at compile time in the variable "ncpus". As long as
    ncpus = 1, everything works fine, but if ncpus = 2 or an higher
    power of 2, the code terminates correctly but the results show
    some artifacts, as you can see from the generated hdf5 file, named
    "output-pfft.h5".
    In the makefile the variables PFFTINC, PFFTLIB, FFTWINC and
    FFTWLIB should be set correctly.
    Thank you,
    Giuseppe




--
What most experimenters take for granted before they begin their experiments is infinitely more interesting than any results to which their experiments lead.
-- Norbert Wiener

PFFTINC          = /pfft-1.0.8/include
FFTWINC          = /fftw-3.3.4/include
PFFTLIB          = /pfft-1.0.8/lib
FFTWLIB          = /fftw-3.3.4/lib

CFLAGS           =-std=c99 -I${PFFTINC} -I${FFTWINC} -L${PFFTLIB} -L${FFTWLIB}
CLEANFILES       = output-pfft.h5 test-pfft
NP               = 1

include ${PETSC_DIR}lib/petsc/conf/variables
include ${PETSC_DIR}lib/petsc/conf/rules


test-fftw: test-fftw.o  chkopts
        -${CLINKER} -o test-fftw test-fftw.o -lm -lfftw3 -lfftw3_mpi 
${PETSC_LIB}
        ${RM} test-fftw.o
test-pfft: test-pfft.o  chkopts
        -${CLINKER} -o test-pfft test-pfft.o -lm -lpfft -lfftw3 -lfftw3_mpi 
${PETSC_LIB}
        ${RM} test-pfft.o

include ${PETSC_DIR}lib/petsc/conf/test
static char help[] = "A test of PETSc-PFFT cohoperation \n\n";

/*
 * Remark: the number of cpus must be specified by setting
 * the variable ncpus before compiling
*/

#include <complex.h>
#include <petscmat.h>
#include <petscdmda.h>
#include <petscviewerhdf5.h>
#include <fftw3-mpi.h>

/* user-defined functions */
PetscErrorCode InitialConditions(DM,Vec,PetscInt,PetscScalar);


int main(int argc,char **args)
{
  PetscErrorCode        ierr;
  PetscMPIInt           rank,size;
  PetscInt              N0=pow(2,10),N1=N0,N=N0*N1,
                        ncpus = 2;
  const ptrdiff_t       n[]={N0,N1};
  ptrdiff_t             alloc_local,local_no,local_o_start;
  Vec                   u,ux;
  Vec                   u_out;
  PetscViewer           viewer;
  PetscScalar           Lx;
  PetscScalar           *ptr_u,*ptr_ux;
  fftw_complex          *uhat,*uhat_x;
  fftw_plan             fplan_u,bplan_x;
  DM                    da;
  MPI_Comm              comm_cart_1d;

  ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);

  ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);

  Lx = 5.;

  ierr = DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_STAR,N0,N1,1,size,1,1,NULL,NULL,&da);CHKERRQ(ierr);
  ierr = DMDASetUniformCoordinates(da,0.,Lx*2.*PETSC_PI,0.,Lx*2.*PETSC_PI,0.,0.);CHKERRQ(ierr);


  fftw_mpi_init();
  alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_no,&local_o_start);

  PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
  PetscPrintf(PETSC_COMM_SELF,"local_no: %d %d\n",(PetscInt)local_no);
  PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d %d\n",(PetscInt)local_o_start);


  // uhat is the Fourier transform of u
  uhat    = fftw_alloc_complex(alloc_local);
  // Fourier transform of the x-derivative of u:
  uhat_x  = fftw_alloc_complex(alloc_local);


  // allocate a real array for u
  ierr = PetscMalloc1(N0/size*N1*sizeof(double),&ptr_u);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,N0/size*N1,N,(const PetscScalar*)ptr_u,&u);CHKERRQ(ierr);
  // allocate a real array for ux, the x-derivative of u
  ierr = PetscMalloc1(N0/size*N1*sizeof(double),&ptr_ux);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,N0/size*N1,N,(const PetscScalar*)ptr_ux,&ux);CHKERRQ(ierr);

  // u_out is used for output only
  DMCreateGlobalVector(da,&u_out);

  ierr = InitialConditions(da,u_out,N0,Lx);CHKERRQ(ierr);
  ierr = VecCopy(u_out,u);CHKERRQ(ierr);




  /* now computing x derivative */
  // fplan_u is the Fourier transform of u -> uhat
  fplan_u = fftw_mpi_plan_dft_2d(N0,N1,ptr_u,uhat,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT);
  // fplan_u is the inverse Fourier transform of uhat_x -> ux
  bplan_x = fftw_mpi_plan_dft_2d(N0,N1,uhat_x,(fftw_complex*)ptr_ux,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
  //pfft_plan bplan_u = pfft_plan_dft(2,n,uhat,(fftw_complex*)ptr_ux,comm_cart_1d,PFFT_BACKWARD,PFFT_TRANSPOSED_NONE);


  fftw_execute(fplan_u);

  // renormalize uhat
  for (unsigned int j=0;j<local_no;j++)
  for (unsigned int i=0;i<N1;i++)
       uhat[j*N1+i] /= (double)(N);


  // multiply uhat by Ii to obtain uhat_x
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=0;i<N1/2;i++)
       uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=N1/2;i<N1;i++)
       uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
  
  fftw_execute(bplan_x);



#if defined(PETSC_HAVE_HDF5)
  char sbuf[] = "output-fftw.h5";
  PetscPrintf(PETSC_COMM_WORLD,"writing vector in hdf5 to %s\n",sbuf);
  PetscViewerHDF5Open(PETSC_COMM_WORLD,sbuf,FILE_MODE_WRITE,&viewer);
  PetscViewerHDF5SetBaseDimension2(viewer,PETSC_TRUE);
  PetscViewerSetFromOptions(viewer);
  VecCopy(u,u_out);
  PetscObjectSetName((PetscObject) u_out, "u");
  VecView(u_out, viewer);
  VecCopy(ux,u_out);
  PetscObjectSetName((PetscObject) u_out, "ux");
  VecView(u_out, viewer);
  PetscViewerDestroy(&viewer);
#endif



  /* Free spaces */
  ierr = VecDestroy(&u);   CHKERRQ(ierr);
  ierr = VecDestroy(&ux);  CHKERRQ(ierr);
  ierr = PetscFree(ptr_u); CHKERRQ(ierr);
  ierr = PetscFree(ptr_ux);CHKERRQ(ierr);
  fftw_destroy_plan(fplan_u);
  fftw_destroy_plan(bplan_x);
  fftw_free(uhat);     
  fftw_free(uhat_x); 
  ierr = VecDestroy(&u_out);CHKERRQ(ierr);
  ierr = DMDestroy(&da);CHKERRQ(ierr);


  fftw_mpi_cleanup();
  ierr = PetscFinalize();
  return 0;
}



PetscErrorCode InitialConditions(DM da,Vec v,PetscInt N0,PetscScalar Lx)
{
  PetscErrorCode        ierr;
  PetscInt              N1 = N0;
  PetscScalar           **v_localptr,r,x0,x1,Ly = Lx,
                        h0 = 2.*PETSC_PI/(double)N0,
                        h1 = 2.*PETSC_PI/(double)N1;
  PetscInt              mstart,nstart,m,n;

  ierr = DMDAGetCorners(da,&mstart,&nstart,0,&m,&n,0);CHKERRQ(ierr);
  ierr = DMDAVecGetArray(da,v,&v_localptr);CHKERRQ(ierr);
  for (unsigned int j=nstart;j<nstart+n;j++)
  for (unsigned int i=mstart;i<mstart+m;i++)
  {
    x0 = Lx*h0*(PetscScalar)i;
    x1 = Ly*h1*(PetscScalar)j;
    r = PetscSqrtScalar(PetscSqr(x0-Lx*PETSC_PI)+PetscSqr(x1-Ly*PETSC_PI));
    v_localptr[j][i] = -6.*4./(PetscExpScalar(2.*r)+PetscExpScalar(-2.*r)+2.);
  }
  ierr = DMDAVecRestoreArray(da,v,&v_localptr);CHKERRQ(ierr);

  return 0;
}

Reply via email to