Bob,

On Thu, 13 Sep 2018 at 18:41, Bob Peterson <[email protected]> wrote:
> Before this patch, any gfs2 withdraw caused the dlm lockspace to
> be unmounted with option 2, which tells dlm to do a clean unmount.
> However, if gfs2 does a withdraw we don't want dlm to think the
> file system is cleanly unmounted and thus fail to set the proper
> sequence number to get the journal replayed. This patch causes
> the dlm to go through a forced shutdown, which ought to force a
> journal replay in withdraw situations.

I see that this change will cause release_lockspace() to skip calling
do_uevent() in the force_replay case. Could you please explain how
this fixes the error scenario?

> Signed-off-by: Bob Peterson <[email protected]>
> ---
>  fs/gfs2/glock.h      | 2 +-
>  fs/gfs2/lock_dlm.c   | 4 ++--
>  fs/gfs2/ops_fstype.c | 2 +-
>  fs/gfs2/util.c       | 2 +-
>  4 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> index 5e12220cc0c2..b46b34a4fef0 100644
> --- a/fs/gfs2/glock.h
> +++ b/fs/gfs2/glock.h
> @@ -126,7 +126,7 @@ struct lm_lockops {
>         void (*lm_first_done) (struct gfs2_sbd *sdp);
>         void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
>                                     unsigned int result);
> -       void (*lm_unmount) (struct gfs2_sbd *sdp);
> +       void (*lm_unmount) (struct gfs2_sbd *sdp, int force_replay);

Can you make this a bool and use true / false instead?

>         void (*lm_withdraw) (struct gfs2_sbd *sdp);
>         void (*lm_put_lock) (struct gfs2_glock *gl);
>         int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
> diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
> index ac7caa267ed6..dd31b37d7814 100644
> --- a/fs/gfs2/lock_dlm.c
> +++ b/fs/gfs2/lock_dlm.c
> @@ -1304,7 +1304,7 @@ static void gdlm_first_done(struct gfs2_sbd *sdp)
>                 fs_err(sdp, "mount first_done error %d\n", error);
>  }
>
> -static void gdlm_unmount(struct gfs2_sbd *sdp)
> +static void gdlm_unmount(struct gfs2_sbd *sdp, int force_replay)
>  {
>         struct lm_lockstruct *ls = &sdp->sd_lockstruct;
>
> @@ -1321,7 +1321,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
>         /* mounted_lock and control_lock will be purged in dlm recovery */
>  release:
>         if (ls->ls_dlm) {
> -               dlm_release_lockspace(ls->ls_dlm, 2);
> +               dlm_release_lockspace(ls->ls_dlm, force_replay ? 3 : 2);
>                 ls->ls_dlm = NULL;
>         }
>
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index c2469833b4fb..cdb0e4a71dbf 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -1000,7 +1000,7 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
>         const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
>         if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
>             lm->lm_unmount)
> -               lm->lm_unmount(sdp);
> +               lm->lm_unmount(sdp, 0);
>  }
>
>  static int wait_on_journal(struct gfs2_sbd *sdp)
> diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
> index b072b10fb635..b5a6e958432d 100644
> --- a/fs/gfs2/util.c
> +++ b/fs/gfs2/util.c
> @@ -69,7 +69,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, 
> ...)
>
>                 if (lm->lm_unmount) {
>                         fs_err(sdp, "telling LM to unmount\n");
> -                       lm->lm_unmount(sdp);
> +                       lm->lm_unmount(sdp, 1);
>                 }
>                 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
>                 fs_err(sdp, "withdrawn\n");
>

Thanks,
Andreas

Reply via email to