Dear petsc-devs

Trying to write a test case for MatZeroRowColumns (as requested by Matt) in order to extend it to MPIBAIJ, I came across a bug in the current implementation for MPIAIJ. The attached test code assembles a simple 2D Laplacian (with the standard 4,-1,-1,-1 stencil) on the following grid:

0 1 2  3
4 5 6  7
8 9 10 11

with boundary conditions applied to nodes 0,1,2,3,4 and 8 with values (stored in Vec x) 0., 1., 2., 3., 4. and 8. For simplicity I've kept a zero rhs (before the call to MatZeroRowsColumns) After the call to MatZeroRowColumns I therefore expect the following rhs:

0. 1. 2. 3. 4. 5. 2. 3. 8. 8. 0. 0.

The entries at 0,1,2,3,4 and 8 correspond to the boundary values (multiplied by a diagonal value of 1.0) and the values at 5,6,7 and 9 to the the lifted values: 1.+4.=5.,2.,3. and 8. However, if you run the attached example that's not what you get, the lifted values at 6 and 7 are wrong. This works fine in serial (you have to change nlocal=4 to get the same grid).

I think this is caused by the way that entries in x are communicated with other processes that have off-diagonal entries referencing those. In line 944 of mpiaij.c it uses a VecScatter with ADD_VALUES to do this, but it uses the local work vector l->lvec without zeroing it first. This means it will still have values in it of whatever it was used for before. In this case that would be the MatMult of line 125 of test.c (in fact you can "fix" the example by adding the line 136-139 where it does a spurious MatMult with a zero vector).

The attached code could be easily extended to test block matrices (simply increasing bs). The commented out lines in the assembly were basically to make the problem a bit more challenging (have an asymmetric stencil and the setting of off-process boundary nodes). The expected results for MPIBAIJ can be obtained by comparing with the MPIAIJ results (once that works correctly).

Cheers
Stephan
static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.";

#include <petscmat.h>

#undef __FUNCT__
#define __FUNCT__ "main"
int main(int argc,char **args)
{
  Mat            A;
  Vec            x, rhs, y, x2, y2;
  PetscInt       i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J;
  PetscInt       *boundary_nodes, nboundary_nodes, *boundary_indices;
  PetscMPIInt    rank,size;
  PetscErrorCode ierr;
  PetscScalar    v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values;

  PetscInitialize(&argc,&args,(char*)0,help);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  n = nlocal*size;

  ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
  ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);CHKERRQ(ierr);
  ierr = MatSetFromOptions(A);CHKERRQ(ierr);
  ierr = MatSetUp(A);CHKERRQ(ierr);

  ierr = VecCreate(PETSC_COMM_WORLD, &rhs);CHKERRQ(ierr);
  ierr = VecSetSizes(rhs, PETSC_DECIDE, m*n*bs);CHKERRQ(ierr);
  ierr = VecSetFromOptions(rhs);CHKERRQ(ierr);
  ierr = VecSetUp(rhs);CHKERRQ(ierr);

  rhsval = 0.0;
  for (i=0; i<m; i++) {
    for (j=nlocal*rank; j<nlocal*(rank+1); j++) {
      a = a0;
      for (b=0; b<bs; b++) {
        /* let's start with a 5-point stencil diffusion term */
        v = -1.0;  Ii = (j + n*i)*bs + b;
        if (i>0)   {J = Ii - n*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
        if (i<m-1) {J = Ii + n*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
        if (j>0)   {J = Ii - 1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
        if (j<n-1) {J = Ii + 1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
        v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr);
        /* now add a 2nd order upwind advection term to add a little asymmetry */
        //if (j>2) {
        //  J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a;
        //  ierr = MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);CHKERRQ(ierr);
        //} else {
        //  /* fall back to 1st order upwind */
        //  v1 = -1.0*a; v0 = 1.0*a;
        //};
        //if (j>1) {J = Ii-1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);CHKERRQ(ierr);}
        //ierr = MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);CHKERRQ(ierr);
        //a /= 10.; /* use a different velocity for the next component */
        ///* add a coupling to the previous and next components */
        //v = 0.5;
        //if (b>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
        //if (b<bs-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
        /* make up some rhs */
        ierr = VecSetValue(rhs, Ii, rhsval, INSERT_VALUES); CHKERRQ(ierr);
        rhsval += 1.0;
      }
    }
  }
  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = VecAssemblyBegin(rhs);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(rhs);CHKERRQ(ierr);
  // set rhs to zero to simplify:
  ierr = VecZeroEntries(rhs);

  /*version where boundary conditions are set by processes that don't necessarily own the nodes */
  //if (rank==0) {
  //  nboundary_nodes = size>m ? nlocal : m-size+nlocal;
  //  ierr = PetscMalloc(nboundary_nodes*sizeof(PetscInt),&boundary_nodes);CHKERRQ(ierr);
  //  k = 0;
  //  for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;};
  //} else {
  //  nboundary_nodes = nlocal+1;
  //  ierr = PetscMalloc(nboundary_nodes*sizeof(PetscInt),&boundary_nodes);CHKERRQ(ierr);
  //  boundary_nodes[0] = rank*n;
  //  k = 1;
  //};
  //for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;};

  /*version where boundary conditions are set by the node owners only */
  ierr = PetscMalloc(m*n*sizeof(PetscInt),&boundary_nodes);CHKERRQ(ierr);
  k=0;
  for (j=0; j<n; j++) {
    Ii = j;
    if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) {
      boundary_nodes[k++] = Ii;
    }
  }
  for (i=1; i<m; i++) {
    Ii = n*i;
    if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) {
      boundary_nodes[k++] = Ii;
    }
  }
  nboundary_nodes = k;


  ierr = VecDuplicate(rhs, &x);CHKERRQ(ierr);
  ierr = VecZeroEntries(x);CHKERRQ(ierr);
  ierr = PetscMalloc(nboundary_nodes*bs*sizeof(PetscInt), &boundary_indices);
  ierr = PetscMalloc(nboundary_nodes*bs*sizeof(PetscScalar), &boundary_values);
  for (k=0; k<nboundary_nodes; k++) {
    Ii = boundary_nodes[k]*bs;
    v = 1.0*boundary_nodes[k];
    for (b=0; b<bs; b++, Ii++) {
      boundary_indices[k*bs+b] = Ii;
      boundary_values[k*bs+b] = v;
      printf("%d %d %f\n", rank, Ii, v);
      v += 0.1;
    }
  }
  ierr = VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES); CHKERRQ(ierr);
  ierr = VecAssemblyBegin(x); CHKERRQ(ierr);
  ierr = VecAssemblyEnd(x); CHKERRQ(ierr);

  /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x  and overwriting the boundary entries with boundary values */
  ierr = VecDuplicate(x, &y); CHKERRQ(ierr);
  ierr = MatMult(A, x, y); CHKERRQ(ierr);
  ierr = VecAYPX(y, -1.0, rhs); CHKERRQ(ierr);
  ierr = VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES); CHKERRQ(ierr);
  ierr = VecAssemblyBegin(y); CHKERRQ(ierr);
  ierr = VecAssemblyEnd(y); CHKERRQ(ierr);

  if (rank==0) printf("*** Matrix A and vector x:\n");
  ierr = MatView(A, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  /* put the following 4 lines back in to "fix" */
  //ierr = VecDuplicate(x, &y2); CHKERRQ(ierr);
  //ierr = VecDuplicate(x, &x2); CHKERRQ(ierr);
  //ierr = VecZeroEntries(x2); CHKERRQ(ierr);
  //ierr = MatMult(A, x2, y2); CHKERRQ(ierr);

  ierr = MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, 1.0, x, rhs);
  if (rank==0) printf("*** Vector rhs returned by MatZeroRowsColumns\n");
  ierr = VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = VecAXPY(y, -1.0, rhs); CHKERRQ(ierr);
  ierr = VecNorm(y, NORM_INFINITY, &v);
  if (rank==0) printf("*** Difference between rhs and y, inf-norm: %f\n", v);
  ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  ierr = VecDestroy(&x); CHKERRQ(ierr);
  ierr = VecDestroy(&y); CHKERRQ(ierr);
  ierr = VecDestroy(&rhs); CHKERRQ(ierr);
  ierr = MatDestroy(&A); CHKERRQ(ierr);

  ierr = PetscFinalize();
  return 0;
}

Reply via email to