Thanks Barry. Indeed, the problem is the different vector distribution between PETSc and FFTW/PFFT. This however still leaves open the issue of how to deal with an array coming from FFTW. In the attached code, the vectors u and uy are saved correctly in hdf5, but for some reason the vector ux is not (not in parallel at least). I cannot find the error, to me the three vectors look as they are written exactly in the same way.

Giuseppe



On 11/23/2015 11:03 PM, Barry Smith wrote:
    Your issues are likely due to a difference in how PETSc and pfft() think the 
"vectors" are laid out across processes

You seem to assume that DMDACreate2d() and pfft_create_procmesh() will make the 
same decisions about parallel layout; you print some information from the 
pfft_create()

   pfft_init();
   pfft_create_procmesh_1d(PETSC_COMM_WORLD,ncpus,&comm_cart_1d);
   alloc_local = 
pfft_local_size_dft(2,n,comm_cart_1d,PFFT_TRANSPOSED_NONE,local_no,local_o_start,local_ni,local_i_start);

   PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
   PetscPrintf(PETSC_COMM_SELF,"local_no: %d 
%d\n",(PetscInt)local_no[0],(PetscInt)local_no[1]);
   PetscPrintf(PETSC_COMM_SELF,"local_ni: %d 
%d\n",(PetscInt)local_ni[0],(PetscInt)local_ni[1]);
   PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d 
%d\n",(PetscInt)local_o_start[0],(PetscInt)local_o_start[1]);
   PetscPrintf(PETSC_COMM_SELF,"local_i_start: %d 
%d\n",(PetscInt)local_i_start[0],(PetscInt)local_i_start[1]);

but do not check anything about the layout DMDACreate2d() selected. First you 
need to call DMDAGetInfo() and DMDAGetLocalInfo() to see the layout PETSc is 
using and make sure it matches pfft


    Barry


On Nov 23, 2015, at 1:31 AM, Giuseppe Pitton <[email protected]> wrote:

Dear users and developers,
I am trying to interface PETSc with the parallel fast Fourier transform library 
PFFT (https://www-user.tu-chemnitz.de/~potts/workgroup/pippig/software.php.en), 
based in turn on FFTW. My plan is to build a spectral differentiation code, and 
in the attached files you can see a simple example.
The code works correctly in serial, but in parallel there are some problems 
regarding the output of the results, I think due to some differences in the way 
PETSc and PFFT store data, but I'm not sure if this is really the issue.
In the attached code, the number of processors used should be specified at compile time in the 
variable "ncpus". As long as ncpus = 1, everything works fine, but if ncpus = 2 or an 
higher power of 2, the code terminates correctly but the results show some artifacts, as you can 
see from the generated hdf5 file, named "output-pfft.h5".
In the makefile the variables PFFTINC, PFFTLIB, FFTWINC and FFTWLIB should be 
set correctly.
Thank you,
Giuseppe

<makefile.txt><test-pfft.c>

static char help[] = "A test of PETSc-FFTW cohoperation \n\n";


#include <complex.h>
#include <petscmat.h>
#include <petscdmda.h>
#include <petscviewerhdf5.h>
#include <fftw3-mpi.h>

/* user-defined functions */
PetscErrorCode InitialConditions(DM,Vec,PetscScalar*,PetscInt,PetscScalar);


int main(int argc,char **args)
{
  PetscErrorCode        ierr;
  PetscMPIInt           rank,size;
  PetscInt              N0=pow(2,10),N1=N0,N=N0*N1;
  ptrdiff_t             alloc_local,local_no,local_o_start;
  Vec                   u,ux,uy;
  Vec                   u_out;
  PetscViewer           viewer;
  PetscScalar           Lx = 5.;
  PetscScalar           *ptr_u,*ptr_ux,*ptr_uy;
  fftw_complex          *uhat,*uhat_x,*uhat_y;
  fftw_plan             fplan_u,bplan_x,bplan_y;
  DM                    da;

  ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);

  ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);


  ierr = DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_STAR,N0,N1,size,1,1,1,NULL,NULL,&da);CHKERRQ(ierr);
  ierr = DMDASetUniformCoordinates(da,0.,Lx*2.*PETSC_PI,0.,Lx*2.*PETSC_PI,0.,0.);CHKERRQ(ierr);


  fftw_mpi_init();
  alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_no,&local_o_start);

  PetscPrintf(PETSC_COMM_SELF,"alloc_local: %d\n",(PetscInt)alloc_local);
  PetscPrintf(PETSC_COMM_SELF,"local_no: %d\n",(PetscInt)local_no);
  PetscPrintf(PETSC_COMM_SELF,"local_o_start: %d\n",(PetscInt)local_o_start);


  // uhat is the Fourier transform of u
  uhat    = fftw_alloc_complex(alloc_local);
  // Fourier transform of the x-derivative of u:
  uhat_x  = fftw_alloc_complex(alloc_local);
  // Fourier transform of the y-derivative of u:
  uhat_y  = fftw_alloc_complex(alloc_local);


  // allocate a real array for u
  ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_u);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_u,&u);CHKERRQ(ierr);
  // allocate a real array for ux, the x-derivative of u
  ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_ux);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_ux,&ux);CHKERRQ(ierr);
  // allocate a real array for uy, the y-derivative of u
  ierr = PetscMalloc1(alloc_local*sizeof(double),&ptr_uy);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,alloc_local,N,(const PetscScalar*)ptr_uy,&uy);CHKERRQ(ierr);

  // u_out is used for output only
  DMCreateGlobalVector(da,&u_out);

  ierr = InitialConditions(da,u_out,ptr_u,N0,Lx);CHKERRQ(ierr);
  //ierr = VecCopy(u_out,u);CHKERRQ(ierr);




  /* now computing x derivative */
  // fplan_u is the Fourier transform of u -> uhat
  fplan_u = fftw_mpi_plan_dft_2d(N0,N1,ptr_u,uhat,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT);
  // fplan_u is the inverse Fourier transform of uhat_x -> ux
  bplan_x = fftw_mpi_plan_dft_2d(N0,N1,uhat_x,(fftw_complex*)ptr_ux,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
  // fplan_u is the inverse Fourier transform of uhat_y -> uy
  bplan_y = fftw_mpi_plan_dft_2d(N0,N1,uhat_y,(fftw_complex*)ptr_uy,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);


  fftw_execute(fplan_u);

  // renormalize uhat
  for (unsigned int j=0;j<local_no;j++)
  for (unsigned int i=0;i<N1;i++)
       uhat[j*N1+i] /= (double)(N);


  // multiply uhat by Ii to obtain uhat_x
  if (size == 1){
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=0;i<N1/2;i++)
       uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=N1/2;i<N1;i++)
       uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
  } else {
  if (rank < size/2)
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=0;i<N1/2;i++)
       uhat_x[j*N1+i] = I*(double)i*uhat[j*N1+i]/Lx/(double)N0;
  else
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=N1/2;i<N1;i++)
       uhat_x[j*N1+i] = -I*(double)(N1-i)*uhat[j*N1+i]/Lx/(double)N0;
  }

  // multiply uhat by Ij to obtain uhat_y
  if (size == 1){
  for (unsigned int j=0;j<local_no/2;j++)
    for (unsigned int i=0;i<N1;i++)
       uhat_y[j*N1+i] = I*(double)j*uhat[j*N1+i]/Lx/(double)N0;
  for (unsigned int j=local_no/2;j<local_no;j++)
    for (unsigned int i=0;i<N1;i++)
       uhat_y[j*N1+i] = -I*(double)(N1-j)*uhat[j*N1+i]/Lx/(double)N0; 
  } else {
  if (rank < size/2)
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=0;i<N1;i++)
       uhat_y[j*N1+i] = I*(double)(j+local_o_start)*uhat[j*N1+i]/Lx/(double)N0;
  else
  for (unsigned int j=0;j<local_no;j++)
    for (unsigned int i=0;i<N1;i++)
       uhat_y[j*N1+i] = -I*(double)(N1-(j+local_o_start))*uhat[j*N1+i]/Lx/(double)N0; 
  }

  fftw_execute(bplan_x);
  fftw_execute(bplan_y);



#if defined(PETSC_HAVE_HDF5)
  char sbuf[] = "output-fftw.h5";
  PetscPrintf(PETSC_COMM_WORLD,"writing vector in hdf5 to %s\n",sbuf);
  PetscViewerHDF5Open(PETSC_COMM_WORLD,sbuf,FILE_MODE_WRITE,&viewer);
  PetscViewerHDF5SetBaseDimension2(viewer,PETSC_TRUE);
  PetscViewerSetFromOptions(viewer);
  PetscScalar **v_localptr;
  PetscInt mstart,nstart,ml,nl;
  ierr = DMDAGetCorners(da,&mstart,&nstart,0,&ml,&nl,0);CHKERRQ(ierr);
  ierr = DMDAVecGetArray(da,u_out,&v_localptr);CHKERRQ(ierr);
  for (unsigned int j=nstart;j<nstart+nl;j++)
  for (unsigned int i=mstart;i<mstart+ml;i++)
    v_localptr[j][i] = ptr_u[j*N1+i];
  ierr = DMDAVecRestoreArray(da,u_out,&v_localptr);CHKERRQ(ierr);
  PetscObjectSetName((PetscObject) u_out, "u");
  VecView(u_out, viewer);
  PetscPrintf(PETSC_COMM_WORLD,"u written\n");

  ierr = DMDAVecGetArray(da,u_out,&v_localptr);CHKERRQ(ierr);
  for (unsigned int j=nstart;j<nstart+nl;j++)
  for (unsigned int i=mstart;i<mstart+ml;i++)
    v_localptr[j][i] = ptr_ux[j*N1+i];
  ierr = DMDAVecRestoreArray(da,u_out,&v_localptr);CHKERRQ(ierr);
  PetscObjectSetName((PetscObject) u_out, "ux");
  VecView(u_out, viewer);
  PetscPrintf(PETSC_COMM_WORLD,"ux written\n");
  
  ierr = DMDAVecGetArray(da,u_out,&v_localptr);CHKERRQ(ierr);
  for (unsigned int j=nstart;j<nstart+nl;j++)
  for (unsigned int i=mstart;i<mstart+ml;i++)
    v_localptr[j][i] = ptr_uy[j*N1+i];
  ierr = DMDAVecRestoreArray(da,u_out,&v_localptr);CHKERRQ(ierr);
  PetscObjectSetName((PetscObject) u_out, "uy");
  VecView(u_out, viewer);
  PetscPrintf(PETSC_COMM_WORLD,"uy written\n");
  DMView(da,viewer);
  PetscViewerDestroy(&viewer);
#endif



  /* Free spaces */
  ierr = VecDestroy(&u);   CHKERRQ(ierr);
  ierr = VecDestroy(&ux);  CHKERRQ(ierr);
  ierr = VecDestroy(&uy);  CHKERRQ(ierr);
  ierr = PetscFree(ptr_u); CHKERRQ(ierr);
  ierr = PetscFree(ptr_ux);CHKERRQ(ierr);
  ierr = PetscFree(ptr_uy);CHKERRQ(ierr);
  fftw_destroy_plan(fplan_u);
  fftw_destroy_plan(bplan_x);
  fftw_destroy_plan(bplan_y);
  fftw_free(uhat);     
  fftw_free(uhat_x); 
  fftw_free(uhat_y); 
  ierr = VecDestroy(&u_out);CHKERRQ(ierr);
  ierr = DMDestroy(&da);CHKERRQ(ierr);


  fftw_mpi_cleanup();
  ierr = PetscFinalize();
  return 0;
}



PetscErrorCode InitialConditions(DM da,Vec v,PetscScalar* ptr_u,PetscInt N0,PetscScalar Lx)
{
  PetscErrorCode        ierr;
  PetscInt              N1 = N0;
  PetscScalar           **v_localptr,r,x0,x1,Ly = Lx,
                        h0 = 2.*PETSC_PI/(double)N0,
                        h1 = 2.*PETSC_PI/(double)N1;
  PetscInt              mstart,nstart,m,n;

  ierr = DMDAGetCorners(da,&mstart,&nstart,0,&m,&n,0);CHKERRQ(ierr);
  ierr = DMDAVecGetArray(da,v,&v_localptr);CHKERRQ(ierr);
  for (unsigned int j=nstart;j<nstart+n;j++)
  for (unsigned int i=mstart;i<mstart+m;i++)
  {
    x0 = Lx*h0*(PetscScalar)i;
    x1 = Ly*h1*(PetscScalar)j;
    r = PetscSqrtScalar(PetscSqr(x0-Lx*PETSC_PI)+PetscSqr(x1-Ly*PETSC_PI));
    v_localptr[j][i] = -6.*4./(PetscExpScalar(2.*r)+PetscExpScalar(-2.*r)+2.);
    ptr_u[j*N0+i] = v_localptr[j][i];
  }
  ierr = DMDAVecRestoreArray(da,v,&v_localptr);CHKERRQ(ierr);

  return 0;
}

Reply via email to