Barry,
I've managed to replicate the problem with 3.7.4
snes/examples/tutorials/ex70.c. Basically I've added
KSPGetTotalIterations to main (file is attached):
$ diff -u ex70.c.bak ex70.c
--- ex70.c.bak2017-01-18 09:25:46.286174830 +0100
+++ ex70.c2017-01-18 09:03:40.904483434 +0100
@@ -669,6 +669,10 @@
KSP ksp;
PetscErrorCode ierr;
+ KSP *subksp;
+ PC pc;
+ PetscInt numsplit = 1, nusediter_vv, nusediter_pp;
+
ierr = PetscInitialize(&argc, &argv, NULL, help);CHKERRQ(ierr);
s.nx = 4;
s.ny = 6;
@@ -690,6 +694,13 @@
ierr = StokesSetupPC(&s, ksp);CHKERRQ(ierr);
ierr = KSPSolve(ksp, s.b, s.x);CHKERRQ(ierr);
+ ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
+ ierr = PCFieldSplitGetSubKSP(pc,&numsplit,&subksp); CHKERRQ(ierr);
+ ierr = KSPGetTotalIterations(subksp[0],&nusediter_vv); CHKERRQ(ierr);
+ ierr = KSPGetTotalIterations(subksp[1],&nusediter_pp); CHKERRQ(ierr);
+ ierr = PetscPrintf(PETSC_COMM_WORLD," total u solves = %i\n", nusediter_vv);
CHKERRQ(ierr);
+ ierr = PetscPrintf(PETSC_COMM_WORLD," total p solves = %i\n", nusediter_pp);
CHKERRQ(ierr);
+
/* don't trust, verify! */
ierr = StokesCalcResidual(&s);CHKERRQ(ierr);
ierr = StokesCalcError(&s);CHKERRQ(ierr);
Now run as follows:
$ mpirun -n 2 ./ex70 -ksp_type fgmres -pc_type fieldsplit -pc_fieldsplit_type
schur -pc_fieldsplit_schur_fact_type lower -fieldsplit_0_ksp_type gmres
-fieldsplit_0_pc_type bjacobi -fieldsplit_1_pc_type jacobi
-fieldsplit_1_inner_ksp_type preonly -fieldsplit_1_inner_pc_type jacobi
-fieldsplit_1_upper_ksp_type preonly -fieldsplit_1_upper_pc_type jacobi
-fieldsplit_0_ksp_converged_reason -fieldsplit_1_ksp_converged_reason
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 14
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 14
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 16
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 16
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 17
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 18
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 20
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 21
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 23
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 22
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 22
Linear fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 5
Linear fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 22
total u solves = 225
total p solves = 225
residual u = 9.67257e-06
residual p = 5.42082e-07
residual [u,p] = 9.68775e-06
discretization error u = 0.0106464
discretization error p = 1.85907
discretization error [u,p] = 1.8591
So here again the total of 225 is correct for p, but for u it
should be 60. Hope this helps you find the problem.
Chris
dr. ir. Christiaan Klaij | CFD Researcher | Research & Development
MARIN | T +31 317 49 33 44 | mailto:[email protected] | http://www.marin.nl
MARIN news:
http://www.marin.nl/web/News/News-items/Few-places-left-for-Offshore-and-Ship-hydrodynamics-courses.htm
________________________________________
From: Klaij, Christiaan
Sent: Tuesday, January 17, 2017 8:45 AM
To: Barry Smith
Cc: [email protected]
Subject: Re: [petsc-users] monitoring the convergence of fieldsplit 0 and 1
Well, that's it, all the rest was hard coded. Here's the relevant part of the
code:
CALL PCSetType(pc_system,PCFIELDSPLIT,ierr); CHKERRQ(ierr)
CALL PCFieldSplitSetType(pc_system,PC_COMPOSITE_SCHUR,ierr); CHKERRQ(ierr)
CALL PCFieldSplitSetIS(pc_system,"0",isgs(1),ierr); CHKERRQ(ierr)
CALL PCFieldSplitSetIS(pc_system,"1",isgs(2),ierr); CHKERRQ(ierr)
CALL
PCFieldSplitSetSchurFactType(pc_system,PC_FIELDSPLIT_SCHUR_FACT_FULL,ierr);CHKERRQ(ierr)
CALL
PCFieldSplitSetSchurPre(pc_system,PC_FIELDSPLIT_SCHUR_PRE_SELFP,PETSC_NULL_OBJECT,ierr);CHKERRQ(ierr)
CALL
KSPSetTolerances(ksp_system,tol,PETSC_DEFAULT_REAL,PETSC_DEFAULT_REAL,maxiter,ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_0_ksp_rtol","0.01",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_1_ksp_rtol","0.01",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_0_ksp_pc_side","right",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_1_ksp_pc_side","right",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_0_ksp_type","gmres",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_1_upper_ksp_type","preonly",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_1_upper_pc_type","jacobi",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_1_inner_ksp_type","preonly",ierr);
CHKERRQ(ierr)
CALL
PetscOptionsSetValue(PETSC_NULL_OBJECT,"-sys_fieldsplit_1_inner_pc_type","jacobi",ierr);
CHKERRQ(ierr)
________________________________________
From: Barry Smith <[email protected]>
Sent: Monday, January 16, 2017 9:28 PM
To: Klaij, Christiaan
Cc: [email protected]
Subject: Re: [petsc-users] monitoring the convergence of fieldsplit 0 and 1
Please send all the command line options you use.
> On Jan 16, 2017, at 1:47 AM, Klaij, Christiaan <[email protected]> wrote:
>
> Barry,
>
> Sure, here's the output with:
>
> -sys_ksp_view -sys_ksp_converged_reason
> -sys_fieldsplit_0_ksp_converged_reason -sys_fieldsplit_1_ksp_converged_reason
>
> (In my previous email, I rearranged 0 & 1 for easy summing.)
>
> Chris
>
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 1
> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 22
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 1
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 2
> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 6
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 2
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 3
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 2
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 2
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 2
> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
> Linear sys_ solve converged due to CONVERGED_RTOL iterations 6
> KSP Object:(sys_) 1 MPI processes
> type: fgmres
> GMRES: restart=30, using Classical (unmodified) Gram-Schmidt
> Orthogonalization with no iterative refinement
> GMRES: happy breakdown tolerance 1e-30
> maximum iterations=300, initial guess is zero
> tolerances: relative=0.01, absolute=1e-50, divergence=10000.
> right preconditioning
> using UNPRECONDITIONED norm type for convergence test
> PC Object:(sys_) 1 MPI processes
> type: fieldsplit
> FieldSplit with Schur preconditioner, factorization FULL
> Preconditioner for the Schur complement formed from Sp, an assembled
> approximation to S, which uses (lumped, if requested) A00's diagonal's inverse
> Split info:
> Split number 0 Defined by IS
> Split number 1 Defined by IS
> KSP solver for A00 block
> KSP Object: (sys_fieldsplit_0_) 1 MPI processes
> type: gmres
> GMRES: restart=30, using Classical (unmodified) Gram-Schmidt
> Orthogonalization with no iterative refinement
> GMRES: happy breakdown tolerance 1e-30
> maximum iterations=10000, initial guess is zero
> tolerances: relative=0.01, absolute=1e-50, divergence=10000.
> right preconditioning
> using UNPRECONDITIONED norm type for convergence test
> PC Object: (sys_fieldsplit_0_) 1 MPI processes
> type: ilu
> ILU: out-of-place factorization
> 0 levels of fill
> tolerance for zero pivot 2.22045e-14
> matrix ordering: natural
> factor fill ratio given 1., needed 1.
> Factored matrix follows:
> Mat Object: 1 MPI processes
> type: seqaij
> rows=9600, cols=9600
> package used to perform factorization: petsc
> total: nonzeros=47280, allocated nonzeros=47280
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> linear system matrix = precond matrix:
> Mat Object: (sys_fieldsplit_0_) 1 MPI processes
> type: seqaij
> rows=9600, cols=9600
> total: nonzeros=47280, allocated nonzeros=47280
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> KSP solver for upper A00 in upper triangular factor
> KSP Object: (sys_fieldsplit_1_upper_) 1 MPI processes
> type: preonly
> maximum iterations=10000, initial guess is zero
> tolerances: relative=1e-05, absolute=1e-50, divergence=10000.
> left preconditioning
> using NONE norm type for convergence test
> PC Object: (sys_fieldsplit_1_upper_) 1 MPI processes
> type: jacobi
> linear system matrix = precond matrix:
> Mat Object: (sys_fieldsplit_0_) 1 MPI processes
> type: seqaij
> rows=9600, cols=9600
> total: nonzeros=47280, allocated nonzeros=47280
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> KSP solver for S = A11 - A10 inv(A00) A01
> KSP Object: (sys_fieldsplit_1_) 1 MPI processes
> type: gmres
> GMRES: restart=30, using Classical (unmodified) Gram-Schmidt
> Orthogonalization with no iterative refinement
> GMRES: happy breakdown tolerance 1e-30
> maximum iterations=10000, initial guess is zero
> tolerances: relative=0.01, absolute=1e-50, divergence=10000.
> right preconditioning
> using UNPRECONDITIONED norm type for convergence test
> PC Object: (sys_fieldsplit_1_) 1 MPI processes
> type: ilu
> ILU: out-of-place factorization
> 0 levels of fill
> tolerance for zero pivot 2.22045e-14
> matrix ordering: natural
> factor fill ratio given 1., needed 1.
> Factored matrix follows:
> Mat Object: 1 MPI processes
> type: seqaij
> rows=3200, cols=3200
> package used to perform factorization: petsc
> total: nonzeros=40404, allocated nonzeros=40404
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> linear system matrix followed by preconditioner matrix:
> Mat Object: (sys_fieldsplit_1_) 1 MPI processes
> type: schurcomplement
> rows=3200, cols=3200
> Schur complement A11 - A10 inv(A00) A01
> A11
> Mat Object: (sys_fieldsplit_1_) 1 MPI
> processes
> type: seqaij
> rows=3200, cols=3200
> total: nonzeros=40404, allocated nonzeros=40404
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> A10
> Mat Object: 1 MPI processes
> type: seqaij
> rows=3200, cols=9600
> total: nonzeros=47280, allocated nonzeros=47280
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> KSP of A00
> KSP Object: (sys_fieldsplit_1_inner_)
> 1 MPI processes
> type: preonly
> maximum iterations=10000, initial guess is zero
> tolerances: relative=1e-05, absolute=1e-50, divergence=10000.
> left preconditioning
> using NONE norm type for convergence test
> PC Object: (sys_fieldsplit_1_inner_)
> 1 MPI processes
> type: jacobi
> linear system matrix = precond matrix:
> Mat Object: (sys_fieldsplit_0_)
> 1 MPI processes
> type: seqaij
> rows=9600, cols=9600
> total: nonzeros=47280, allocated nonzeros=47280
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> A01
> Mat Object: 1 MPI processes
> type: seqaij
> rows=9600, cols=3200
> total: nonzeros=47280, allocated nonzeros=47280
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> Mat Object: 1 MPI processes
> type: seqaij
> rows=3200, cols=3200
> total: nonzeros=40404, allocated nonzeros=40404
> total number of mallocs used during MatSetValues calls =0
> not using I-node routines
> linear system matrix followed by preconditioner matrix:
> Mat Object: 1 MPI processes
> type: nest
> rows=12800, cols=12800
> Matrix object:
> type=nest, rows=2, cols=2
> MatNest structure:
> (0,0) : prefix="mom_", type=seqaij, rows=9600, cols=9600
> (0,1) : prefix="grad_", type=seqaij, rows=9600, cols=3200
> (1,0) : prefix="div_", type=seqaij, rows=3200, cols=9600
> (1,1) : prefix="stab_", type=seqaij, rows=3200, cols=3200
> Mat Object: 1 MPI processes
> type: nest
> rows=12800, cols=12800
> Matrix object:
> type=nest, rows=2, cols=2
> MatNest structure:
> (0,0) : prefix="sys_fieldsplit_0_", type=seqaij, rows=9600, cols=9600
> (0,1) : type=seqaij, rows=9600, cols=3200
> (1,0) : type=seqaij, rows=3200, cols=9600
> (1,1) : prefix="sys_fieldsplit_1_", type=seqaij, rows=3200, cols=3200
> nusediter_vv 37
> nusediter_pp 37
>
>
>
> dr. ir. Christiaan Klaij | CFD Researcher | Research & Development
> MARIN | T +31 317 49 33 44 | mailto:[email protected] | http://www.marin.nl
>
> MARIN news:
> http://www.marin.nl/web/News/News-items/The-Ocean-Cleanup-testing-continues.htm
>
> ________________________________________
> From: Barry Smith <[email protected]>
> Sent: Friday, January 13, 2017 7:51 PM
> To: Klaij, Christiaan
> Cc: [email protected]
> Subject: Re: [petsc-users] monitoring the convergence of fieldsplit 0 and 1
>
> Yes, I would have expected this to work. Could you send the output from
> -ksp_view in this case?
>
>
>> On Jan 13, 2017, at 3:46 AM, Klaij, Christiaan <[email protected]> wrote:
>>
>> Barry,
>>
>> It's been a while but I'm finally using this function in
>> 3.7.4. Is it supposed to work with fieldsplit? Here's why.
>>
>> I'm solving a Navier-Stokes system with fieldsplit (pc has one
>> velocity solve and one pressure solve) and trying to retrieve the
>> totals like this:
>>
>> CALL KSPSolve(ksp_system,rr_system,xx_system,ierr); CHKERRQ(ierr)
>> CALL PCFieldSplitGetSubKSP(pc_system,numsplit,subksp,ierr); CHKERRQ(ierr)
>> CALL KSPGetTotalIterations(subksp(1),nusediter_vv,ierr); CHKERRQ(ierr)
>> CALL KSPGetTotalIterations(subksp(2),nusediter_pp,ierr); CHKERRQ(ierr)
>> print *, 'nusediter_vv', nusediter_vv
>> print *, 'nusediter_pp', nusediter_pp
>>
>> Running the code shows this surprise:
>>
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 1
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 1
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 2
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 2
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 7
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
>> Linear sys_fieldsplit_0_ solve converged due to CONVERGED_RTOL iterations 8
>>
>> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 22
>> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 6
>> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 3
>> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 2
>> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 2
>> Linear sys_fieldsplit_1_ solve converged due to CONVERGED_RTOL iterations 2
>>
>> nusediter_vv 37
>> nusediter_pp 37
>>
>> So the value of nusediter_pp is indeed 37, but for nusediter_vv
>> it should be 66. Any idea what went wrong?
>>
>> Chris
>>
>>
>>
>> dr. ir. Christiaan Klaij | CFD Researcher | Research & Development
>> MARIN | T +31 317 49 33 44 | mailto:[email protected] | http://www.marin.nl
>>
>> MARIN news:
>> http://www.marin.nl/web/News/News-items/MARIN-wishes-you-a-challenging-inspiring-2017.htm
>>
>> ________________________________________
>> From: Barry Smith <[email protected]>
>> Sent: Saturday, April 11, 2015 12:27 AM
>> To: Klaij, Christiaan
>> Cc: [email protected]
>> Subject: Re: [petsc-users] monitoring the convergence of fieldsplit 0 and 1
>>
>> Chris,
>>
>> I have added KSPGetTotalIterations() to the branch
>> barry/add-ksp-total-iterations/master and next. After tests it will go into
>> master
>>
>> Barry
>>
>>> On Apr 10, 2015, at 8:07 AM, Klaij, Christiaan <[email protected]> wrote:
>>>
>>> Barry,
>>>
>>> Sure, I can call PCFieldSplitGetSubKSP() to get the fieldsplit_0
>>> ksp and then KSPGetIterationNumber, but what does this number
>>> mean?
>>>
>>> It appears to be the number of iterations of the last time that
>>> the subsystem was solved, right? If so, this corresponds to the
>>> last iteration of the coupled system, how about all the previous
>>> iterations?
>>>
>>> Chris
>>> ________________________________________
>>> From: Barry Smith <[email protected]>
>>> Sent: Friday, April 10, 2015 2:48 PM
>>> To: Klaij, Christiaan
>>> Cc: [email protected]
>>> Subject: Re: [petsc-users] monitoring the convergence of fieldsplit 0 and 1
>>>
>>> Chris,
>>>
>>> It appears you should call PCFieldSplitGetSubKSP() and then get the
>>> information you want out of the individual KSPs. If this doesn't work
>>> please let us know.
>>>
>>> Barry
>>>
>>>> On Apr 10, 2015, at 6:48 AM, Klaij, Christiaan <[email protected]> wrote:
>>>>
>>>> A question when using PCFieldSplit: for each linear iteration of
>>>> the system, how many iterations for fielsplit 0 and 1?
>>>>
>>>> One way to find out is to run with -ksp_monitor,
>>>> -fieldsplit_0_ksp_monitor and -fieldsplit_0_ksp_monitor. This
>>>> gives the complete convergence history.
>>>>
>>>> Another way, suggested by Matt, is to use -ksp_monitor,
>>>> -fieldsplit_0_ksp_converged_reason and
>>>> -fieldsplit_1_ksp_converged_reason. This gives only the totals
>>>> for fieldsplit 0 and 1 (but without saying for which one).
>>>>
>>>> Both ways require to somehow process the output, which is a bit
>>>> inconvenient. Could KSPGetResidualHistory perhaps return (some)
>>>> information on the subsystems' convergence for processing inside
>>>> the code?
>>>>
>>>> Chris
>>>>
>>>>
>>>> dr. ir. Christiaan Klaij
>>>> CFD Researcher
>>>> Research & Development
>>>> E mailto:[email protected]
>>>> T +31 317 49 33 44
>>>>
>>>>
>>>> MARIN
>>>> 2, Haagsteeg, P.O. Box 28, 6700 AA Wageningen, The Netherlands
>>>> T +31 317 49 39 11, F +31 317 49 32 45, I www.marin.nl
>>>>
>>>
>>
>
static char help[] = "Poiseuille flow problem. Viscous, laminar flow in a 2D channel with parabolic velocity\n\
profile and linear pressure drop, exact solution of the 2D Stokes\n";
/*---------------------------------------------------------------------------- */
/* M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S */
/*---------------------------------------------------------------------------- */
/* author : Christiaan M. Klaij */
/*---------------------------------------------------------------------------- */
/* */
/* Poiseuille flow problem. */
/* */
/* Viscous, laminar flow in a 2D channel with parabolic velocity */
/* profile and linear pressure drop, exact solution of the 2D Stokes */
/* equations. */
/* */
/* Discretized with the cell-centered finite-volume method on a */
/* Cartesian grid with co-located variables. Variables ordered as */
/* [u1...uN v1...vN p1...pN]^T. Matrix [A00 A01; A10, A11] solved with */
/* PCFIELDSPLIT. Lower factorization is used to mimick the Semi-Implicit */
/* Method for Pressure Linked Equations (SIMPLE) used as preconditioner */
/* instead of solver. */
/* */
/* Disclaimer: does not contain the pressure-weighed interpolation */
/* method needed to suppress spurious pressure modes in real-life */
/* problems. */
/* */
/* Usage: */
/* */
/* mpiexec -n 2 ./ex70 -nx 32 -ny 48 -ksp_type fgmres -pc_type fieldsplit -pc_fieldsplit_type schur -pc_fieldsplit_schur_fact_type lower -fieldsplit_1_pc_type none */
/* */
/* Runs with PCFIELDSPLIT on 32x48 grid, no PC for the Schur */
/* complement because A11 is zero. FGMRES is needed because */
/* PCFIELDSPLIT is a variable preconditioner. */
/* */
/* mpiexec -n 2 ./ex70 -nx 32 -ny 48 -ksp_type fgmres -pc_type fieldsplit -pc_fieldsplit_type schur -pc_fieldsplit_schur_fact_type lower -user_pc */
/* */
/* Same as above but with user defined PC for the true Schur */
/* complement. PC based on the SIMPLE-type approximation (inverse of */
/* A00 approximated by inverse of its diagonal). */
/* */
/* mpiexec -n 2 ./ex70 -nx 32 -ny 48 -ksp_type fgmres -pc_type fieldsplit -pc_fieldsplit_type schur -pc_fieldsplit_schur_fact_type lower -user_ksp */
/* */
/* Replace the true Schur complement with a user defined Schur */
/* complement based on the SIMPLE-type approximation. Same matrix is */
/* used as PC. */
/* */
/* mpiexec -n 2 ./ex70 -nx 32 -ny 48 -ksp_type fgmres -pc_type fieldsplit -pc_fieldsplit_type schur -pc_fieldsplit_schur_fact_type lower -fieldsplit_0_ksp_type gmres -fieldsplit_0_pc_type bjacobi -fieldsplit_1_pc_type jacobi -fieldsplit_1_inner_ksp_type preonly -fieldsplit_1_inner_pc_type jacobi -fieldsplit_1_upper_ksp_type preonly -fieldsplit_1_upper_pc_type jacobi */
/* */
/* Out-of-the-box SIMPLE-type preconditioning. The major advantage */
/* is that the user neither needs to provide the approximation of */
/* the Schur complement, nor the corresponding preconditioner. */
/* */
/*---------------------------------------------------------------------------- */
#include <petscksp.h>
typedef struct {
PetscBool userPC, userKSP; /* user defined preconditioner and matrix for the Schur complement */
PetscInt nx, ny; /* nb of cells in x- and y-direction */
PetscReal hx, hy; /* mesh size in x- and y-direction */
Mat A; /* block matrix */
Mat subA[4]; /* the four blocks */
Mat myS; /* the approximation of the Schur complement */
Vec x, b, y; /* solution, rhs and temporary vector */
IS isg[2]; /* index sets of split "0" and "1" */
} Stokes;
PetscErrorCode StokesSetupMatBlock00(Stokes*); /* setup the block Q */
PetscErrorCode StokesSetupMatBlock01(Stokes*); /* setup the block G */
PetscErrorCode StokesSetupMatBlock10(Stokes*); /* setup the block D (equal to the transpose of G) */
PetscErrorCode StokesSetupMatBlock11(Stokes*); /* setup the block C (equal to zero) */
PetscErrorCode StokesGetPosition(Stokes*, PetscInt, PetscInt*, PetscInt*); /* row number j*nx+i corresponds to position (i,j) in grid */
PetscErrorCode StokesStencilLaplacian(Stokes*, PetscInt, PetscInt, PetscInt*, PetscInt*, PetscScalar*); /* stencil of the Laplacian operator */
PetscErrorCode StokesStencilGradientX(Stokes*, PetscInt, PetscInt, PetscInt*, PetscInt*, PetscScalar*); /* stencil of the Gradient operator (x-component) */
PetscErrorCode StokesStencilGradientY(Stokes*, PetscInt, PetscInt, PetscInt*, PetscInt*, PetscScalar*); /* stencil of the Gradient operator (y-component) */
PetscErrorCode StokesRhs(Stokes*); /* rhs vector */
PetscErrorCode StokesRhsMomX(Stokes*, PetscInt, PetscInt, PetscScalar*); /* right hand side of velocity (x-component) */
PetscErrorCode StokesRhsMomY(Stokes*, PetscInt, PetscInt, PetscScalar*); /* right hand side of velocity (y-component) */
PetscErrorCode StokesRhsMass(Stokes*, PetscInt, PetscInt, PetscScalar*); /* right hand side of pressure */
PetscErrorCode StokesSetupApproxSchur(Stokes*); /* approximation of the Schur complement */
PetscErrorCode StokesExactSolution(Stokes*); /* exact solution vector */
PetscErrorCode StokesWriteSolution(Stokes*); /* write solution to file */
/* exact solution for the velocity (x-component, y-component is zero) */
PetscScalar StokesExactVelocityX(const PetscScalar y)
{
return 4.0*y*(1.0-y);
}
/* exact solution for the pressure */
PetscScalar StokesExactPressure(const PetscScalar x)
{
return 8.0*(2.0-x);
}
PetscErrorCode StokesSetupPC(Stokes *s, KSP ksp)
{
KSP *subksp;
PC pc;
PetscInt n = 1;
PetscErrorCode ierr;
PetscFunctionBeginUser;
ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
ierr = PCFieldSplitSetIS(pc, "0", s->isg[0]);CHKERRQ(ierr);
ierr = PCFieldSplitSetIS(pc, "1", s->isg[1]);CHKERRQ(ierr);
if (s->userPC) {
ierr = PCFieldSplitSetSchurPre(pc, PC_FIELDSPLIT_SCHUR_PRE_USER, s->myS);CHKERRQ(ierr);
}
if (s->userKSP) {
ierr = PCSetUp(pc);CHKERRQ(ierr);
ierr = PCFieldSplitGetSubKSP(pc, &n, &subksp);CHKERRQ(ierr);
ierr = KSPSetOperators(subksp[1], s->myS, s->myS);CHKERRQ(ierr);
ierr = PetscFree(subksp);CHKERRQ(ierr);
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesWriteSolution(Stokes *s)
{
PetscMPIInt size;
PetscInt n,i,j;
const PetscScalar *array;
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* write data (*warning* only works sequential) */
MPI_Comm_size(MPI_COMM_WORLD,&size);
/*ierr = PetscPrintf(PETSC_COMM_WORLD," number of processors = %D\n",size);CHKERRQ(ierr);*/
if (size == 1) {
PetscViewer viewer;
ierr = VecGetArrayRead(s->x, &array);CHKERRQ(ierr);
ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD, "solution.dat", &viewer);CHKERRQ(ierr);
ierr = PetscViewerASCIIPrintf(viewer, "# x, y, u, v, p\n");CHKERRQ(ierr);
for (j = 0; j < s->ny; j++) {
for (i = 0; i < s->nx; i++) {
n = j*s->nx+i;
ierr = PetscViewerASCIIPrintf(viewer, "%.12g %.12g %.12g %.12g %.12g\n", (double)(i*s->hx+s->hx/2),(double)(j*s->hy+s->hy/2), (double)PetscRealPart(array[n]), (double)PetscRealPart(array[n+s->nx*s->ny]),(double)PetscRealPart(array[n+2*s->nx*s->ny]));CHKERRQ(ierr);
}
}
ierr = VecRestoreArrayRead(s->x, &array);CHKERRQ(ierr);
ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupIndexSets(Stokes *s)
{
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* the two index sets */
ierr = MatNestGetISs(s->A, s->isg, NULL);CHKERRQ(ierr);
/* ISView(isg[0],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
/* ISView(isg[1],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupVectors(Stokes *s)
{
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* solution vector x */
ierr = VecCreate(PETSC_COMM_WORLD, &s->x);CHKERRQ(ierr);
ierr = VecSetSizes(s->x, PETSC_DECIDE, 3*s->nx*s->ny);CHKERRQ(ierr);
ierr = VecSetType(s->x, VECMPI);CHKERRQ(ierr);
/* ierr = VecSetRandom(s->x, NULL);CHKERRQ(ierr); */
/* ierr = VecView(s->x, (PetscViewer) PETSC_VIEWER_DEFAULT);CHKERRQ(ierr); */
/* exact solution y */
ierr = VecDuplicate(s->x, &s->y);CHKERRQ(ierr);
ierr = StokesExactSolution(s);CHKERRQ(ierr);
/* ierr = VecView(s->y, (PetscViewer) PETSC_VIEWER_DEFAULT);CHKERRQ(ierr); */
/* rhs vector b */
ierr = VecDuplicate(s->x, &s->b);CHKERRQ(ierr);
ierr = StokesRhs(s);CHKERRQ(ierr);
/*ierr = VecView(s->b, (PetscViewer) PETSC_VIEWER_DEFAULT);CHKERRQ(ierr);*/
PetscFunctionReturn(0);
}
PetscErrorCode StokesGetPosition(Stokes *s, PetscInt row, PetscInt *i, PetscInt *j)
{
PetscInt n;
PetscFunctionBeginUser;
/* cell number n=j*nx+i has position (i,j) in grid */
n = row%(s->nx*s->ny);
*i = n%s->nx;
*j = (n-(*i))/s->nx;
PetscFunctionReturn(0);
}
PetscErrorCode StokesExactSolution(Stokes *s)
{
PetscInt row, start, end, i, j;
PetscScalar val;
Vec y0,y1;
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* velocity part */
ierr = VecGetSubVector(s->y, s->isg[0], &y0);CHKERRQ(ierr);
ierr = VecGetOwnershipRange(y0, &start, &end);CHKERRQ(ierr);
for (row = start; row < end; row++) {
ierr = StokesGetPosition(s, row,&i,&j);CHKERRQ(ierr);
if (row < s->nx*s->ny) {
val = StokesExactVelocityX(j*s->hy+s->hy/2);
} else {
val = 0;
}
ierr = VecSetValue(y0, row, val, INSERT_VALUES);CHKERRQ(ierr);
}
ierr = VecRestoreSubVector(s->y, s->isg[0], &y0);CHKERRQ(ierr);
/* pressure part */
ierr = VecGetSubVector(s->y, s->isg[1], &y1);CHKERRQ(ierr);
ierr = VecGetOwnershipRange(y1, &start, &end);CHKERRQ(ierr);
for (row = start; row < end; row++) {
ierr = StokesGetPosition(s, row, &i, &j);CHKERRQ(ierr);
val = StokesExactPressure(i*s->hx+s->hx/2);
ierr = VecSetValue(y1, row, val, INSERT_VALUES);CHKERRQ(ierr);
}
ierr = VecRestoreSubVector(s->y, s->isg[1], &y1);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesRhs(Stokes *s)
{
PetscInt row, start, end, i, j;
PetscScalar val;
Vec b0,b1;
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* velocity part */
ierr = VecGetSubVector(s->b, s->isg[0], &b0);CHKERRQ(ierr);
ierr = VecGetOwnershipRange(b0, &start, &end);CHKERRQ(ierr);
for (row = start; row < end; row++) {
ierr = StokesGetPosition(s, row, &i, &j);CHKERRQ(ierr);
if (row < s->nx*s->ny) {
ierr = StokesRhsMomX(s, i, j, &val);CHKERRQ(ierr);
} else if (row < 2*s->nx*s->ny) {
ierr = StokesRhsMomY(s, i, j, &val);CHKERRQ(ierr);
}
ierr = VecSetValue(b0, row, val, INSERT_VALUES);CHKERRQ(ierr);
}
ierr = VecRestoreSubVector(s->b, s->isg[0], &b0);CHKERRQ(ierr);
/* pressure part */
ierr = VecGetSubVector(s->b, s->isg[1], &b1);CHKERRQ(ierr);
ierr = VecGetOwnershipRange(b1, &start, &end);CHKERRQ(ierr);
for (row = start; row < end; row++) {
ierr = StokesGetPosition(s, row, &i, &j);CHKERRQ(ierr);
ierr = StokesRhsMass(s, i, j, &val);CHKERRQ(ierr);
ierr = VecSetValue(b1, row, val, INSERT_VALUES);CHKERRQ(ierr);
}
ierr = VecRestoreSubVector(s->b, s->isg[1], &b1);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupMatBlock00(Stokes *s)
{
PetscInt row, start, end, size, i, j;
PetscInt cols[5];
PetscScalar vals[5];
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* A[0] is 2N-by-2N */
ierr = MatCreate(PETSC_COMM_WORLD,&s->subA[0]);CHKERRQ(ierr);
ierr = MatSetOptionsPrefix(s->subA[0],"a00_");CHKERRQ(ierr);
ierr = MatSetSizes(s->subA[0],PETSC_DECIDE,PETSC_DECIDE,2*s->nx*s->ny,2*s->nx*s->ny);CHKERRQ(ierr);
ierr = MatSetType(s->subA[0],MATMPIAIJ);CHKERRQ(ierr);
ierr = MatMPIAIJSetPreallocation(s->subA[0],5,NULL,5,NULL);CHKERRQ(ierr);
ierr = MatGetOwnershipRange(s->subA[0], &start, &end);CHKERRQ(ierr);
for (row = start; row < end; row++) {
ierr = StokesGetPosition(s, row, &i, &j);CHKERRQ(ierr);
/* first part: rows 0 to (nx*ny-1) */
ierr = StokesStencilLaplacian(s, i, j, &size, cols, vals);CHKERRQ(ierr);
/* second part: rows (nx*ny) to (2*nx*ny-1) */
if (row >= s->nx*s->ny) {
for (i = 0; i < 5; i++) cols[i] = cols[i] + s->nx*s->ny;
}
for (i = 0; i < 5; i++) vals[i] = -1.0*vals[i]; /* dynamic viscosity coef mu=-1 */
ierr = MatSetValues(s->subA[0], 1, &row, size, cols, vals, INSERT_VALUES);CHKERRQ(ierr);
}
ierr = MatAssemblyBegin(s->subA[0], MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatAssemblyEnd(s->subA[0], MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupMatBlock01(Stokes *s)
{
PetscInt row, start, end, size, i, j;
PetscInt cols[5];
PetscScalar vals[5];
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* A[1] is 2N-by-N */
ierr = MatCreate(PETSC_COMM_WORLD, &s->subA[1]);CHKERRQ(ierr);
ierr = MatSetOptionsPrefix(s->subA[1],"a01_");CHKERRQ(ierr);
ierr = MatSetSizes(s->subA[1],PETSC_DECIDE,PETSC_DECIDE,2*s->nx*s->ny,s->nx*s->ny);CHKERRQ(ierr);
ierr = MatSetType(s->subA[1],MATMPIAIJ);CHKERRQ(ierr);
ierr = MatMPIAIJSetPreallocation(s->subA[1],5,NULL,5,NULL);CHKERRQ(ierr);
ierr = MatGetOwnershipRange(s->subA[1],&start,&end);CHKERRQ(ierr);
ierr = MatSetOption(s->subA[1],MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
for (row = start; row < end; row++) {
ierr = StokesGetPosition(s, row, &i, &j);CHKERRQ(ierr);
/* first part: rows 0 to (nx*ny-1) */
if (row < s->nx*s->ny) {
ierr = StokesStencilGradientX(s, i, j, &size, cols, vals);CHKERRQ(ierr);
}
/* second part: rows (nx*ny) to (2*nx*ny-1) */
else {
ierr = StokesStencilGradientY(s, i, j, &size, cols, vals);CHKERRQ(ierr);
}
ierr = MatSetValues(s->subA[1], 1, &row, size, cols, vals, INSERT_VALUES);CHKERRQ(ierr);
}
ierr = MatAssemblyBegin(s->subA[1], MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatAssemblyEnd(s->subA[1], MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupMatBlock10(Stokes *s)
{
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* A[2] is minus transpose of A[1] */
ierr = MatTranspose(s->subA[1], MAT_INITIAL_MATRIX, &s->subA[2]);CHKERRQ(ierr);
ierr = MatScale(s->subA[2], -1.0);CHKERRQ(ierr);
ierr = MatSetOptionsPrefix(s->subA[2], "a10_");CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupMatBlock11(Stokes *s)
{
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* A[3] is N-by-N null matrix */
ierr = MatCreate(PETSC_COMM_WORLD, &s->subA[3]);CHKERRQ(ierr);
ierr = MatSetOptionsPrefix(s->subA[3], "a11_");CHKERRQ(ierr);
ierr = MatSetSizes(s->subA[3], PETSC_DECIDE, PETSC_DECIDE, s->nx*s->ny, s->nx*s->ny);CHKERRQ(ierr);
ierr = MatSetType(s->subA[3], MATMPIAIJ);CHKERRQ(ierr);
ierr = MatMPIAIJSetPreallocation(s->subA[3], 0, NULL, 0, NULL);CHKERRQ(ierr);
ierr = MatAssemblyBegin(s->subA[3], MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatAssemblyEnd(s->subA[3], MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupApproxSchur(Stokes *s)
{
Vec diag;
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* Schur complement approximation: myS = A11 - A10 diag(A00)^(-1) A01 */
/* note: A11 is zero */
/* note: in real life this matrix would be build directly, */
/* i.e. without MatMatMult */
/* inverse of diagonal of A00 */
ierr = VecCreate(PETSC_COMM_WORLD,&diag);CHKERRQ(ierr);
ierr = VecSetSizes(diag,PETSC_DECIDE,2*s->nx*s->ny);CHKERRQ(ierr);
ierr = VecSetType(diag,VECMPI);CHKERRQ(ierr);
ierr = MatGetDiagonal(s->subA[0],diag);
ierr = VecReciprocal(diag);
/* compute: - A10 diag(A00)^(-1) A01 */
ierr = MatDiagonalScale(s->subA[1],diag,NULL);CHKERRQ(ierr); /* (*warning* overwrites subA[1]) */
ierr = MatMatMult(s->subA[2],s->subA[1],MAT_INITIAL_MATRIX,PETSC_DEFAULT,&s->myS);CHKERRQ(ierr);
ierr = MatScale(s->myS,-1.0);CHKERRQ(ierr);
/* restore A10 */
ierr = MatGetDiagonal(s->subA[0],diag);CHKERRQ(ierr);
ierr = MatDiagonalScale(s->subA[1],diag,NULL);CHKERRQ(ierr);
ierr = VecDestroy(&diag);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesSetupMatrix(Stokes *s)
{
PetscErrorCode ierr;
PetscFunctionBeginUser;
ierr = StokesSetupMatBlock00(s);CHKERRQ(ierr);
ierr = StokesSetupMatBlock01(s);CHKERRQ(ierr);
ierr = StokesSetupMatBlock10(s);CHKERRQ(ierr);
ierr = StokesSetupMatBlock11(s);CHKERRQ(ierr);
ierr = MatCreateNest(PETSC_COMM_WORLD, 2, NULL, 2, NULL, s->subA, &s->A);CHKERRQ(ierr);
ierr = StokesSetupApproxSchur(s);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesStencilLaplacian(Stokes *s, PetscInt i, PetscInt j, PetscInt *size, PetscInt *cols, PetscScalar *vals)
{
PetscInt p =j*s->nx+i, w=p-1, e=p+1, s2=p-s->nx, n=p+s->nx;
PetscScalar ae=s->hy/s->hx, aeb=0;
PetscScalar aw=s->hy/s->hx, awb=s->hy/(s->hx/2);
PetscScalar as=s->hx/s->hy, asb=s->hx/(s->hy/2);
PetscScalar an=s->hx/s->hy, anb=s->hx/(s->hy/2);
PetscFunctionBeginUser;
if (i==0 && j==0) { /* south-west corner */
*size =3;
cols[0]=p; vals[0]=-(ae+awb+asb+an);
cols[1]=e; vals[1]=ae;
cols[2]=n; vals[2]=an;
} else if (i==0 && j==s->ny-1) { /* north-west corner */
*size =3;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(ae+awb+as+anb);
cols[2]=e; vals[2]=ae;
} else if (i==s->nx-1 && j==0) { /* south-east corner */
*size =3;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(aeb+aw+asb+an);
cols[2]=n; vals[2]=an;
} else if (i==s->nx-1 && j==s->ny-1) { /* north-east corner */
*size =3;
cols[0]=s2; vals[0]=as;
cols[1]=w; vals[1]=aw;
cols[2]=p; vals[2]=-(aeb+aw+as+anb);
} else if (i==0) { /* west boundary */
*size =4;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(ae+awb+as+an);
cols[2]=e; vals[2]=ae;
cols[3]=n; vals[3]=an;
} else if (i==s->nx-1) { /* east boundary */
*size =4;
cols[0]=s2; vals[0]=as;
cols[1]=w; vals[1]=aw;
cols[2]=p; vals[2]=-(aeb+aw+as+an);
cols[3]=n; vals[3]=an;
} else if (j==0) { /* south boundary */
*size =4;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(ae+aw+asb+an);
cols[2]=e; vals[2]=ae;
cols[3]=n; vals[3]=an;
} else if (j==s->ny-1) { /* north boundary */
*size =4;
cols[0]=s2; vals[0]=as;
cols[1]=w; vals[1]=aw;
cols[2]=p; vals[2]=-(ae+aw+as+anb);
cols[3]=e; vals[3]=ae;
} else { /* interior */
*size =5;
cols[0]=s2; vals[0]=as;
cols[1]=w; vals[1]=aw;
cols[2]=p; vals[2]=-(ae+aw+as+an);
cols[3]=e; vals[3]=ae;
cols[4]=n; vals[4]=an;
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesStencilGradientX(Stokes *s, PetscInt i, PetscInt j, PetscInt *size, PetscInt *cols, PetscScalar *vals)
{
PetscInt p =j*s->nx+i, w=p-1, e=p+1;
PetscScalar ae= s->hy/2, aeb=s->hy;
PetscScalar aw=-s->hy/2, awb=0;
PetscFunctionBeginUser;
if (i==0 && j==0) { /* south-west corner */
*size =2;
cols[0]=p; vals[0]=-(ae+awb);
cols[1]=e; vals[1]=ae;
} else if (i==0 && j==s->ny-1) { /* north-west corner */
*size =2;
cols[0]=p; vals[0]=-(ae+awb);
cols[1]=e; vals[1]=ae;
} else if (i==s->nx-1 && j==0) { /* south-east corner */
*size =2;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(aeb+aw);
} else if (i==s->nx-1 && j==s->ny-1) { /* north-east corner */
*size =2;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(aeb+aw);
} else if (i==0) { /* west boundary */
*size =2;
cols[0]=p; vals[0]=-(ae+awb);
cols[1]=e; vals[1]=ae;
} else if (i==s->nx-1) { /* east boundary */
*size =2;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(aeb+aw);
} else if (j==0) { /* south boundary */
*size =3;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(ae+aw);
cols[2]=e; vals[2]=ae;
} else if (j==s->ny-1) { /* north boundary */
*size =3;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(ae+aw);
cols[2]=e; vals[2]=ae;
} else { /* interior */
*size =3;
cols[0]=w; vals[0]=aw;
cols[1]=p; vals[1]=-(ae+aw);
cols[2]=e; vals[2]=ae;
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesStencilGradientY(Stokes *s, PetscInt i, PetscInt j, PetscInt *size, PetscInt *cols, PetscScalar *vals)
{
PetscInt p =j*s->nx+i, s2=p-s->nx, n=p+s->nx;
PetscScalar as=-s->hx/2, asb=0;
PetscScalar an= s->hx/2, anb=0;
PetscFunctionBeginUser;
if (i==0 && j==0) { /* south-west corner */
*size =2;
cols[0]=p; vals[0]=-(asb+an);
cols[1]=n; vals[1]=an;
} else if (i==0 && j==s->ny-1) { /* north-west corner */
*size =2;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(as+anb);
} else if (i==s->nx-1 && j==0) { /* south-east corner */
*size =2;
cols[0]=p; vals[0]=-(asb+an);
cols[1]=n; vals[1]=an;
} else if (i==s->nx-1 && j==s->ny-1) { /* north-east corner */
*size =2;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(as+anb);
} else if (i==0) { /* west boundary */
*size =3;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(as+an);
cols[2]=n; vals[2]=an;
} else if (i==s->nx-1) { /* east boundary */
*size =3;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(as+an);
cols[2]=n; vals[2]=an;
} else if (j==0) { /* south boundary */
*size =2;
cols[0]=p; vals[0]=-(asb+an);
cols[1]=n; vals[1]=an;
} else if (j==s->ny-1) { /* north boundary */
*size =2;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(as+anb);
} else { /* interior */
*size =3;
cols[0]=s2; vals[0]=as;
cols[1]=p; vals[1]=-(as+an);
cols[2]=n; vals[2]=an;
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesRhsMomX(Stokes *s, PetscInt i, PetscInt j, PetscScalar *val)
{
PetscScalar y = j*s->hy+s->hy/2;
PetscScalar awb = s->hy/(s->hx/2);
PetscFunctionBeginUser;
if (i == 0) { /* west boundary */
*val = awb*StokesExactVelocityX(y);
} else {
*val = 0.0;
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesRhsMomY(Stokes *s, PetscInt i, PetscInt j, PetscScalar *val)
{
PetscFunctionBeginUser;
*val = 0.0;
PetscFunctionReturn(0);
}
PetscErrorCode StokesRhsMass(Stokes *s, PetscInt i, PetscInt j, PetscScalar *val)
{
PetscScalar y = j*s->hy+s->hy/2;
PetscScalar aeb = s->hy;
PetscFunctionBeginUser;
if (i == 0) { /* west boundary */
*val = aeb*StokesExactVelocityX(y);
} else {
*val = 0.0;
}
PetscFunctionReturn(0);
}
PetscErrorCode StokesCalcResidual(Stokes *s)
{
PetscReal val;
Vec b0, b1;
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* residual Ax-b (*warning* overwrites b) */
ierr = VecScale(s->b, -1.0);CHKERRQ(ierr);
ierr = MatMultAdd(s->A, s->x, s->b, s->b);CHKERRQ(ierr);
/* ierr = VecView(s->b, (PetscViewer)PETSC_VIEWER_DEFAULT);CHKERRQ(ierr); */
/* residual velocity */
ierr = VecGetSubVector(s->b, s->isg[0], &b0);CHKERRQ(ierr);
ierr = VecNorm(b0, NORM_2, &val);CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," residual u = %g\n",(double)val);CHKERRQ(ierr);
ierr = VecRestoreSubVector(s->b, s->isg[0], &b0);CHKERRQ(ierr);
/* residual pressure */
ierr = VecGetSubVector(s->b, s->isg[1], &b1);CHKERRQ(ierr);
ierr = VecNorm(b1, NORM_2, &val);CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," residual p = %g\n",(double)val);CHKERRQ(ierr);
ierr = VecRestoreSubVector(s->b, s->isg[1], &b1);CHKERRQ(ierr);
/* total residual */
ierr = VecNorm(s->b, NORM_2, &val);CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," residual [u,p] = %g\n", (double)val);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
PetscErrorCode StokesCalcError(Stokes *s)
{
PetscScalar scale = PetscSqrtReal((double)s->nx*s->ny);
PetscReal val;
Vec y0, y1;
PetscErrorCode ierr;
PetscFunctionBeginUser;
/* error y-x */
ierr = VecAXPY(s->y, -1.0, s->x);CHKERRQ(ierr);
/* ierr = VecView(s->y, (PetscViewer)PETSC_VIEWER_DEFAULT);CHKERRQ(ierr); */
/* error in velocity */
ierr = VecGetSubVector(s->y, s->isg[0], &y0);CHKERRQ(ierr);
ierr = VecNorm(y0, NORM_2, &val);CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," discretization error u = %g\n",(double)(PetscRealPart(val/scale)));CHKERRQ(ierr);
ierr = VecRestoreSubVector(s->y, s->isg[0], &y0);CHKERRQ(ierr);
/* error in pressure */
ierr = VecGetSubVector(s->y, s->isg[1], &y1);CHKERRQ(ierr);
ierr = VecNorm(y1, NORM_2, &val);CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," discretization error p = %g\n",(double)(PetscRealPart(val/scale)));CHKERRQ(ierr);
ierr = VecRestoreSubVector(s->y, s->isg[1], &y1);CHKERRQ(ierr);
/* total error */
ierr = VecNorm(s->y, NORM_2, &val);CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," discretization error [u,p] = %g\n", (double)PetscRealPart((val/scale)));CHKERRQ(ierr);
PetscFunctionReturn(0);
}
int main(int argc, char **argv)
{
Stokes s;
KSP ksp;
PetscErrorCode ierr;
KSP *subksp;
PC pc;
PetscInt numsplit = 1, nusediter_vv, nusediter_pp;
ierr = PetscInitialize(&argc, &argv, NULL, help);CHKERRQ(ierr);
s.nx = 4;
s.ny = 6;
ierr = PetscOptionsGetInt(NULL,NULL, "-nx", &s.nx, NULL);CHKERRQ(ierr);
ierr = PetscOptionsGetInt(NULL,NULL, "-ny", &s.ny, NULL);CHKERRQ(ierr);
s.hx = 2.0/s.nx;
s.hy = 1.0/s.ny;
s.userPC = s.userKSP = PETSC_FALSE;
ierr = PetscOptionsHasName(NULL,NULL, "-user_pc", &s.userPC);CHKERRQ(ierr);
ierr = PetscOptionsHasName(NULL,NULL, "-user_ksp", &s.userKSP);CHKERRQ(ierr);
ierr = StokesSetupMatrix(&s);CHKERRQ(ierr);
ierr = StokesSetupIndexSets(&s);CHKERRQ(ierr);
ierr = StokesSetupVectors(&s);CHKERRQ(ierr);
ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);CHKERRQ(ierr);
ierr = KSPSetOperators(ksp, s.A, s.A);CHKERRQ(ierr);
ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
ierr = StokesSetupPC(&s, ksp);CHKERRQ(ierr);
ierr = KSPSolve(ksp, s.b, s.x);CHKERRQ(ierr);
ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
ierr = PCFieldSplitGetSubKSP(pc,&numsplit,&subksp); CHKERRQ(ierr);
ierr = KSPGetTotalIterations(subksp[0],&nusediter_vv); CHKERRQ(ierr);
ierr = KSPGetTotalIterations(subksp[1],&nusediter_pp); CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," total u solves = %i\n", nusediter_vv); CHKERRQ(ierr);
ierr = PetscPrintf(PETSC_COMM_WORLD," total p solves = %i\n", nusediter_pp); CHKERRQ(ierr);
/* don't trust, verify! */
ierr = StokesCalcResidual(&s);CHKERRQ(ierr);
ierr = StokesCalcError(&s);CHKERRQ(ierr);
ierr = StokesWriteSolution(&s);CHKERRQ(ierr);
ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
ierr = MatDestroy(&s.subA[0]);CHKERRQ(ierr);
ierr = MatDestroy(&s.subA[1]);CHKERRQ(ierr);
ierr = MatDestroy(&s.subA[2]);CHKERRQ(ierr);
ierr = MatDestroy(&s.subA[3]);CHKERRQ(ierr);
ierr = MatDestroy(&s.A);CHKERRQ(ierr);
ierr = VecDestroy(&s.x);CHKERRQ(ierr);
ierr = VecDestroy(&s.b);CHKERRQ(ierr);
ierr = VecDestroy(&s.y);CHKERRQ(ierr);
ierr = MatDestroy(&s.myS);CHKERRQ(ierr);
ierr = PetscFinalize();CHKERRQ(ierr);
return 0;
}