Sometimes when gfs2 cancels a glock request, dlm needs time to take the request off its Conversion queue. During that time, we get -EBUSY from dlm, which confuses the glock state machine. Ideally we want dlm to not return -EBUSY but wait until the operation has completed. This is a stop-gap measure until dlm has a solution in place.
Signed-off-by: Bob Peterson <rpete...@redhat.com> --- fs/gfs2/lock_dlm.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 50578f881e6d..bf03c77b6757 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -258,7 +258,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - int req; + int req, ret; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; @@ -278,9 +278,12 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, /* * Submit the actual lock request. */ - - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, - GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + do { + ret = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, + GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, + gdlm_bast); + } while (ret == -EBUSY); + return ret; } static void gdlm_put_lock(struct gfs2_glock *gl) @@ -312,8 +315,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl) return; } - error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, - NULL, gl); + do { + error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, + DLM_LKF_VALBLK, NULL, gl); + } while (error == -EBUSY); + if (error) { fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, @@ -506,7 +512,9 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + do { + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + } while (error == -EBUSY); if (error) { fs_err(sdp, "%s lkid %x error %d\n", name, lksb->sb_lkid, error); -- 2.34.1