In  PCSetUp_MG, processors can diverge in the red line, DMGetDMKSPWrite
 for (i=n-2; i>-1; i--) {
      DMKSP     kdm;
      PetscBool dmhasrestrict, dmhasinject;
      ierr = KSPSetDM(mglevels[i]->smoothd,dms[i]);CHKERRQ(ierr);
      if (!needRestricts) {ierr = 
KSPSetDMActive(mglevels[i]->smoothd,PETSC_FALSE);CHKERRQ(ierr);}
      if (mglevels[i]->smoothd != mglevels[i]->smoothu) {
        ierr = KSPSetDM(mglevels[i]->smoothu,dms[i]);CHKERRQ(ierr);
        if (!needRestricts) {ierr = 
KSPSetDMActive(mglevels[i]->smoothu,PETSC_FALSE);CHKERRQ(ierr);}
      }
      ierr = DMGetDMKSPWrite(dms[i],&kdm);CHKERRQ(ierr);
--Junchao Zhang


On Fri, Jun 14, 2019 at 12:43 PM Junchao Zhang 
<[email protected]<mailto:[email protected]>> wrote:
Hello,
   I am investigating petsc issue 
306<https://bitbucket.org/petsc/petsc/issues/306/snes-example-ex9-sometimes-fails-in>.
 One can produce the problem with src/snes/examples/tutorials/ex9.c and mpirun 
-n 3 ./ex9 -snes_grid_sequence 3 -snes_converged_reason -pc_type mg
  The program can run to finish,  or crash, or hang.  The error appears either 
in PetscGatherMessageLengths or PetscCommBuildTwoSided().  From from my 
debugging, the following routine is suspicious.  It claims not collective, but 
it might call DMKSPCreate, which can indirectly call a collective 
MPI_Comm_dup().
  Can someone familiar with the code explain it?  Thanks.

  /*@C
   DMGetDMKSPWrite - get write access to private DMKSP context from a DM

   Not Collective

   Input Argument:
.  dm - DM to be used with KSP

   Output Argument:
.  kspdm - private DMKSP context

   Level: developer

.seealso: DMGetDMKSP()
@*/
PetscErrorCode DMGetDMKSPWrite(DM dm,DMKSP *kspdm)
{
  PetscErrorCode ierr;
  DMKSP          kdm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(dm,DM_CLASSID,1);
  ierr = DMGetDMKSP(dm,&kdm);CHKERRQ(ierr);
  if (!kdm->originaldm) kdm->originaldm = dm;
  if (kdm->originaldm != dm) {  /* Copy on write */
    DMKSP oldkdm = kdm;
    ierr      = PetscInfo(dm,"Copying DMKSP due to write\n");CHKERRQ(ierr);
    ierr      = 
DMKSPCreate(PetscObjectComm((PetscObject)dm),&kdm);CHKERRQ(ierr);
    ierr      = DMKSPCopy(oldkdm,kdm);CHKERRQ(ierr);
    ierr      = DMKSPDestroy((DMKSP*)&dm->dmksp);CHKERRQ(ierr);
    dm->dmksp = (PetscObject)kdm;
  }
  *kspdm = kdm;
  PetscFunctionReturn(0);
}

The calling stack is
...
#20 0x00007fcf3e3d23e2 in PetscCommDuplicate 
(comm_in=comm_in@entry=-2080374782, comm_out=comm_out@entry=0x557a18304db0, 
first_tag=first_tag@entry=0x557a18304de4) at 
/home/jczhang/petsc/src/sys/objects/tagm.c:162
#21 0x00007fcf3e3d7730 in PetscHeaderCreate_Private (h=0x557a18304d70, 
classid=<optimized out>, class_name=class_name@entry=0x7fcf3f7f762a "DMKSP", 
descr=descr@entry=0x7fcf3f7f762a "DMKSP",  mansec=mansec@entry=0x7fcf3f7f762a 
"DMKSP", comm=comm@entry=-2080374782, destroy=0x7fcf3f350570 <DMKSPDestroy>, 
view=0x0)    at /home/jczhang/petsc/src/sys/objects/inherit.c:64
#22 0x00007fcf3f3504c9 in DMKSPCreate (comm=-2080374782, 
kdm=kdm@entry=0x7ffc1d4d00f8)     at 
/home/jczhang/petsc/src/ksp/ksp/interface/dmksp.c:24
#23 0x00007fcf3f35150f in DMGetDMKSPWrite (dm=0x557a18541a10, 
kspdm=kspdm@entry=0x7ffc1d4d01a8)     at 
/home/jczhang/petsc/src/ksp/ksp/interface/dmksp.c:155
#24 0x00007fcf3f1bb20e in PCSetUp_MG (pc=<optimized out>) at 
/home/jczhang/petsc/src/ksp/pc/impls/mg/mg.c:682
#25 0x00007fcf3f204bea in PCSetUp (pc=0x557a17dc1860) at 
/home/jczhang/petsc/src/ksp/pc/interface/precon.c:894
#26 0x00007fcf3f32ba4b in KSPSetUp (ksp=0x557a17d73500) at 
/home/jczhang/petsc/src/ksp/ksp/interface/itfunc.c:377
#27 0x00007fcf3f41e43e in SNESSolve_VINEWTONRSLS (snes=0x557a17bff210) at 
/home/jczhang/petsc/src/snes/impls/vi/rs/virs.c:502
#28 0x00007fcf3f3fa191 in SNESSolve (snes=0x557a17bff210, b=0x0, x=<optimized 
out>)     at /home/jczhang/petsc/src/snes/interface/snes.c:4433
#29 0x0000557a16432095 in main (argc=<optimized out>, argv=<optimized out>)     
at /home/jczhang/petsc/src/snes/examples/tutorials/ex9.c:105

--Junchao Zhang

Reply via email to