Hi,

This patch introduces a new glock workqueue, gfs2_glock_final. The
workqueue merely does work to call dlm's unlock.
This prevents gfs2_evict_inode from calling dlm directly which
might block, waiting for DLM to unlock, which may be waiting for
something like a fence operation. By moving it to its own work queue,
the final put happens later, which allows the shrinker to continue,
and free memory, avoiding a livelock.

Signed-off-by: Bob Peterson <rpete...@redhat.com>
---
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 795c2f3..9d0f3d5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -63,6 +63,7 @@ static void do_xmote(struct gfs2_glock *gl, struct 
gfs2_holder *gh, unsigned int
 
 static struct dentry *gfs2_root;
 static struct workqueue_struct *glock_workqueue;
+static struct workqueue_struct *gfs2_final_workqueue;
 struct workqueue_struct *gfs2_delete_workqueue;
 static LIST_HEAD(lru_list);
 static atomic_t lru_count = ATOMIC_INIT(0);
@@ -152,6 +153,20 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock 
*gl)
        spin_unlock(&lru_lock);
 }
 
+/* The purpose of this function is to tell dlm when a glock is not needed
+ * We can't do this directly from gfs2_glock_put because dlm may block while
+ * waiting for a fence operation to complete. But the fence may block on
+ * memory allocation, which may block on the shrinker, which may block on
+ * the evict code. So the buck stops here.
+ */
+static void final_work_func(struct work_struct *work)
+{
+       struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_final);
+       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+       sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
+}
+
 /**
  * gfs2_glock_put() - Decrement reference count on glock
  * @gl: The glock to put
@@ -160,7 +175,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock 
*gl)
 
 void gfs2_glock_put(struct gfs2_glock *gl)
 {
-       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        struct address_space *mapping = gfs2_glock2aspace(gl);
 
        if (lockref_put_or_lock(&gl->gl_lockref))
@@ -174,7 +188,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
        GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
        GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
        trace_gfs2_glock_put(gl);
-       sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
+       BUG_ON(queue_work(gfs2_final_workqueue, &gl->gl_final) == 0);
 }
 
 /**
@@ -700,6 +714,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
        INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
        INIT_WORK(&gl->gl_delete, delete_work_func);
+       INIT_WORK(&gl->gl_final, final_work_func);
 
        mapping = gfs2_glock2aspace(gl);
        if (mapping) {
@@ -1772,6 +1787,14 @@ int __init gfs2_glock_init(void)
                rhashtable_destroy(&gl_hash_table);
                return -ENOMEM;
        }
+       gfs2_final_workqueue = alloc_workqueue("final_workqueue",
+                                              WQ_MEM_RECLAIM | WQ_HIGHPRI |
+                                              WQ_FREEZABLE, 0);
+       if (IS_ERR(gfs2_final_workqueue)) {
+               destroy_workqueue(glock_workqueue);
+               destroy_workqueue(gfs2_delete_workqueue);
+               return PTR_ERR(gfs2_final_workqueue);
+       }
 
        register_shrinker(&glock_shrinker);
 
@@ -1784,6 +1807,7 @@ void gfs2_glock_exit(void)
        rhashtable_destroy(&gl_hash_table);
        destroy_workqueue(glock_workqueue);
        destroy_workqueue(gfs2_delete_workqueue);
+       destroy_workqueue(gfs2_final_workqueue);
 }
 
 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a6a3389..1b63fbc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -361,6 +361,7 @@ struct gfs2_glock {
        atomic_t gl_ail_count;
        atomic_t gl_revokes;
        struct delayed_work gl_work;
+       struct work_struct gl_final;
        union {
                /* For inode and iopen glocks only */
                struct work_struct gl_delete;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 0357862..4232368 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -126,6 +126,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, 
unsigned int type,
                error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, 
&ip->i_gl);
                if (unlikely(error))
                        goto fail;
+
+               flush_work(&ip->i_gl->gl_final);
                ip->i_gl->gl_object = ip;
 
                error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, 
&io_gl);
@@ -189,6 +191,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 
no_addr,
        if (error)
                return ERR_PTR(error);
 
+       flush_work(&i_gh.gh_gl->gl_final);
        error = gfs2_check_blk_type(sdp, no_addr, blktype);
        if (error)
                goto fail;
@@ -681,6 +684,7 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
        if (error)
                goto fail_free_inode;
 
+       flush_work(&ip->i_gl->gl_final);
        ip->i_gl->gl_object = ip;
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
        if (error)

Reply via email to