This patch adds a new specialized function to look up an inode
for the purposes of deleting it. Before, we used to call function
gfs2_lookup_by_inum, but the new function closes some timing
windows involving the iopen and inode glocks coming and going,
since they typically outlive their inodes.

Signed-off-by: Bob Peterson <[email protected]>
---
 fs/gfs2/glock.c |   9 ++--
 fs/gfs2/inode.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/inode.h |   2 +
 3 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 926652f..e5df66a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -569,16 +569,17 @@ static void delete_work_func(struct work_struct *work)
        struct gfs2_glock *gl = container_of(work, struct gfs2_glock, 
gl_delete);
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        struct gfs2_inode *ip;
-       struct inode *inode;
+       struct inode *inode = NULL;
        u64 no_addr = gl->gl_name.ln_number;
 
        ip = gl->gl_object;
        /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-
        if (ip)
                inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
-       else
-               inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, 
GFS2_BLKST_UNLINKED);
+
+       if (inode == NULL || IS_ERR(inode))
+               inode = gfs2_inode_lookup_for_del(sdp->sd_vfs, no_addr);
+
        if (inode && !IS_ERR(inode)) {
                d_prune_aliases(inode);
                iput(inode);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06c208b..1c57f7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -153,6 +153,139 @@ fail:
        return ERR_PTR(error);
 }
 
+/**
+ * gfs2_inode_lookup_for_del - Lookup an unlinked inode so we can delete it
+ * @sb: The super block
+ * @no_addr: The inode number
+ *
+ * Returns: A VFS inode, or an error
+ *
+ * We jump through some hoops here to avoid a special case in which the block
+ * has been freed and already reallocated for a different inode while after
+ * the iopen callback was received to signify a remote unlink that needs
+ * deleting. In that case, we don't want to return the inode to the caller
+ * to delete the inode. We also need to do an inode_refresh to ensure that
+ * whomever recreated the dinode gets a proper i_nlink count, otherwise the
+ * vfs might think it's unlinked and still needs deleting.
+ */
+struct inode *gfs2_inode_lookup_for_del(struct super_block *sb, u64 no_addr)
+{
+       struct inode *inode;
+       struct gfs2_inode *ip;
+       struct gfs2_glock *io_gl = NULL;
+       struct gfs2_holder i_gh;
+       int error, btype;
+       struct gfs2_sbd *sdp = sb->s_fs_info;
+       struct address_space *mapping;
+       BUG_ON(no_addr == 0);
+       inode = iget_locked(sb, (unsigned long)no_addr);
+       ip = GFS2_I(inode);
+
+       if (!inode)
+               return ERR_PTR(-ENOBUFS);
+
+       if (!(inode->i_state & I_NEW)) {
+               BUG_ON(ip->i_no_addr != no_addr);
+               return inode;
+       }
+
+       ip->i_no_addr = no_addr;
+
+       error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE,
+                              &ip->i_gl);
+       if (unlikely(error))
+               goto fail;
+       /* If this is a brand new inode, but a recycled glock, we've got a
+          reference problem. In clear_inode it does an extra glock_put so the
+          next time we do clear_inode for this inode, we'll get in trouble.
+          So we hold the glock to bump the reference count. */
+       if (ip->i_gl->gl_flags != 0) /* New inode, recycled glock */
+               ip->i_gl->gl_lockref.count++;
+
+       ip->i_gl->gl_object = ip;
+
+       error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE,
+                              &io_gl);
+       if (unlikely(error))
+               goto fail_put;
+
+       ip->i_iopen_gh.gh_gl = NULL;
+       gfs2_glock_put(io_gl);
+       io_gl = NULL;
+
+       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
+       if (error)
+               goto fail_put;
+
+       /* Inode glock must be locked already */
+       btype = gfs2_get_blk_type(sdp, no_addr);
+       if (btype < 0) {
+               error = btype;
+               goto fail_refresh;
+       }
+       if (btype == GFS2_BLKST_FREE || btype == GFS2_BLKST_USED) {
+               error = -ESTALE;
+               goto fail_refresh;
+       }
+
+       /* At this point it's either UNLINKED or DINODE */
+
+       /* If there aren't any pages associated with it and it's a new inode,
+        * we shouldn't be messing with it, even to read in pages. We should
+        * just exit and let whomever's using it carry on. If we do inode
+        * refresh, we'll end up adding pages to the cache that we'd otherwise
+        * need to truncate with truncate_inode_page().
+        */
+       mapping = gfs2_glock2aspace(ip->i_gl);
+       if (mapping->nrpages == 0 && btype == GFS2_BLKST_DINODE) {
+               error = -ESTALE;
+               goto fail_refresh;
+       }
+       /* At this point, we've got a dinode with pages associated with it
+        * or it's unlinked. If it's unlinked, we need to do inode_refresh
+        * so that our put() will delete it normally through gfs2_delete_inode.
+        * If it has pages associated with it, we may have grabbed the glock
+        * while it was being created, so we need to refresh it before
+        * exiting.
+        */
+       error = gfs2_inode_refresh(GFS2_I(inode));
+       if (error)
+               goto fail_refresh;
+
+       if (btype == GFS2_BLKST_DINODE) {
+               /* Now we know this is a dinode (not unlinked), but we know
+                * there were already pages associated with it. So it's safe
+                * to exit:
+                */
+               error = -ESTALE;
+               goto fail_refresh;
+       }
+
+       gfs2_set_iop(inode);
+       unlock_new_inode(inode);
+       gfs2_glock_dq_uninit(&i_gh);
+       gfs2_glock_put(ip->i_gl);
+       return inode;
+
+fail_refresh:
+       ip->i_gl->gl_object = NULL; /* See note below */
+       gfs2_glock_dq_uninit(&i_gh);
+fail_put:
+       /* This setting of gl_object to NULL is done by the other lookup
+        * functions. But if it races with someone reusing the dinode, we
+        * don't want to mess them up. It seems necessary in order to prevent
+        * buffer_heads from being attached after the i_gh is acquired.
+        * But it seems like it has the potential to screw up people trying
+        * to re-use the glock for a new incarnation of the inode.
+        * For now, I'm going to move it inside the dq_uninit.
+        */
+       /*ip->i_gl->gl_object = NULL;*/
+       gfs2_glock_put(ip->i_gl);
+fail:
+       iget_failed(inode);
+       return ERR_PTR(error);
+}
+
 struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
                                  u64 *no_formal_ino, unsigned int blktype)
 {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 22c27a8..031e301 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -96,6 +96,8 @@ err:
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
                                       u64 no_addr, u64 no_formal_ino,
                                       int non_block);
+extern struct inode *gfs2_inode_lookup_for_del(struct super_block *sb,
+                                              u64 no_addr);
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
                                         u64 *no_formal_ino,
                                         unsigned int blktype);
-- 
2.4.3

Reply via email to