Hi, This patch introduces a new glock workqueue, gfs2_glock_final. The workqueue merely does work to call dlm's unlock. This prevents gfs2_evict_inode from calling dlm directly which might block, waiting for DLM to unlock, which may be waiting for something like a fence operation. By moving it to its own work queue, the final put happens later, which allows the shrinker to continue, and free memory, avoiding a livelock.
Signed-off-by: Bob Peterson <rpete...@redhat.com> --- diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 795c2f3..9d0f3d5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -63,6 +63,7 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; +static struct workqueue_struct *gfs2_final_workqueue; struct workqueue_struct *gfs2_delete_workqueue; static LIST_HEAD(lru_list); static atomic_t lru_count = ATOMIC_INIT(0); @@ -152,6 +153,20 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } +/* The purpose of this function is to tell dlm when a glock is not needed + * We can't do this directly from gfs2_glock_put because dlm may block while + * waiting for a fence operation to complete. But the fence may block on + * memory allocation, which may block on the shrinker, which may block on + * the evict code. So the buck stops here. + */ +static void final_work_func(struct work_struct *work) +{ + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_final); + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); +} + /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put @@ -160,7 +175,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); if (lockref_put_or_lock(&gl->gl_lockref)) @@ -174,7 +188,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); - sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); + BUG_ON(queue_work(gfs2_final_workqueue, &gl->gl_final) == 0); } /** @@ -700,6 +714,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); + INIT_WORK(&gl->gl_final, final_work_func); mapping = gfs2_glock2aspace(gl); if (mapping) { @@ -1772,6 +1787,14 @@ int __init gfs2_glock_init(void) rhashtable_destroy(&gl_hash_table); return -ENOMEM; } + gfs2_final_workqueue = alloc_workqueue("final_workqueue", + WQ_MEM_RECLAIM | WQ_HIGHPRI | + WQ_FREEZABLE, 0); + if (IS_ERR(gfs2_final_workqueue)) { + destroy_workqueue(glock_workqueue); + destroy_workqueue(gfs2_delete_workqueue); + return PTR_ERR(gfs2_final_workqueue); + } register_shrinker(&glock_shrinker); @@ -1784,6 +1807,7 @@ void gfs2_glock_exit(void) rhashtable_destroy(&gl_hash_table); destroy_workqueue(glock_workqueue); destroy_workqueue(gfs2_delete_workqueue); + destroy_workqueue(gfs2_final_workqueue); } static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a6a3389..1b63fbc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -361,6 +361,7 @@ struct gfs2_glock { atomic_t gl_ail_count; atomic_t gl_revokes; struct delayed_work gl_work; + struct work_struct gl_final; union { /* For inode and iopen glocks only */ struct work_struct gl_delete; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 0357862..4232368 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -126,6 +126,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (unlikely(error)) goto fail; + + flush_work(&ip->i_gl->gl_final); ip->i_gl->gl_object = ip; error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); @@ -189,6 +191,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, if (error) return ERR_PTR(error); + flush_work(&i_gh.gh_gl->gl_final); error = gfs2_check_blk_type(sdp, no_addr, blktype); if (error) goto fail; @@ -681,6 +684,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; + flush_work(&ip->i_gl->gl_final); ip->i_gl->gl_object = ip; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); if (error)