struct rb_root rd_rstree; /* multi-block reservation tree */
};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8700eb815638..3176cadfc309 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1121,8 +1121,8 @@ static int gfs2_unlink(struct inode *dir, struct dentry
*dentry)
if (!rgd)
goto out_inodes;
- gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_EX,
+ ghs + 2);
error = gfs2_glock_nq(ghs); /* parent */
if (error)
@@ -1409,7 +1409,8 @@ static int gfs2_rename(struct inode *odir, struct dentry
*odentry,
*/
nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
if (nrgd)
- gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs +
num_gh++);
+ gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, ghs + num_gh++);
}
for (x = 0; x < num_gh; x++) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b683917a27e..7891229b836d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -904,6 +904,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
spin_lock_init(&rgd->rd_rsspin);
+ sema_init(&rgd->rd_sem, 1);
error = compute_bitstructs(rgd);
if (error)
@@ -1344,6 +1345,25 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64
offset,
return -EIO;
}
+/**
+ * rgrp_lock - gain exclusive access to an rgrp
+ * @gl: the glock
+ *
+ * This function waits for exclusive access to an rgrp whose glock is held in
+ * EX, but shared via the LM_FLAG_NODE_EX holder flag.
+ */
+static void rgrp_lock(struct gfs2_rgrpd *rgd)
+{
+ BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl));
+ down(&rgd->rd_sem);
+}
+
+static void rgrp_unlock(struct gfs2_rgrpd *rgd)
+{
+ BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl));
+ up(&rgd->rd_sem);
+}
+
/**
* gfs2_fitrim - Generate discard requests for unused bits of the filesystem
* @filp: Any file on the filesystem
@@ -1399,7 +1419,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
while (1) {
- ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, &gh);
if (ret)
goto out;
@@ -1420,11 +1441,13 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Mark rgrp as having been trimmed */
ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
if (ret == 0) {
+ rgrp_lock(rgd);
bh = rgd->rd_bits[0].bi_bh;
rgd->rd_flags |= GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, bh);
gfs2_rgrp_out(rgd, bh->b_data);
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
+ rgrp_unlock(rgd);
gfs2_trans_end(sdp);
}
}
@@ -1768,6 +1791,16 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state,
u32 *minext,
*
* Returns: 0 if no error
* The inode, if one has been found, in inode.
+ * We must be careful to avoid deadlock here:
+ *
+ * All transactions expect: sd_log_flush_lock followed by rgrp ex (if neeeded),
+ * but try_rgrp_unlink takes sd_log_flush_lock outside a transaction and
+ * therefore must not have the rgrp ex already held. To avoid deadlock, we
+ * drop the rgrp ex lock before taking the log_flush_lock, then reacquire it
+ * to protect our call to gfs2_rbm_find.
+ *
+ * Also note that rgrp_unlock must come AFTER the caller does gfs2_rs_deltree
+ * because rgrp ex needs to be held before making reservations.
*/
static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
@@ -1781,7 +1814,12 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64
*last_unlinked, u64 skip
struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
while (1) {
+ /* As explained above, we need to drop the rgrp ex lock and
+ * reacquire it after we get for sd_log_flush_lock.
+ */
+ rgrp_unlock(rgd);
down_write(&sdp->sd_log_flush_lock);
+ rgrp_lock(rgd);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
true);
up_write(&sdp->sd_log_flush_lock);
@@ -1980,7 +2018,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
struct gfs2_blkreserv *rs = &ip->i_res;
- int error = 0, rg_locked, flags = 0;
+ int error = 0, rg_locked, flags = LM_FLAG_NODE_EX;
u64 last_unlinked = NO_BLOCK;
int loops = 0;
u32 skip = 0;
@@ -1991,6 +2029,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
return -EINVAL;
if (gfs2_rs_active(rs)) {
begin = rs->rs_rbm.rgd;
+ flags = LM_FLAG_NODE_EX;
} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
rs->rs_rbm.rgd = begin = ip->i_rgd;
} else {
@@ -2023,16 +2062,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
&rs->rs_rgd_gh);
if (unlikely(error))
return error;
+ rgrp_lock(rs->rs_rbm.rgd);
if (!gfs2_rs_active(rs) && (loops < 2) &&
gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
error = update_rgrp_lvb(rs->rs_rbm.rgd);
if (unlikely(error)) {
+ rgrp_unlock(rs->rs_rbm.rgd);
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
return error;
}
}
+ } else {
+ rgrp_lock(rs->rs_rbm.rgd);
}
/* Skip unuseable resource groups */
@@ -2058,14 +2101,21 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
ip->i_rgd = rs->rs_rbm.rgd;
ap->allowed = ip->i_rgd->rd_free_clone;
+ rgrp_unlock(rs->rs_rbm.rgd);
return 0;
}
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
- if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
+ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) {
+ /* Drop reservation if we couldn't use reserved rgrp */
+ if (gfs2_rs_active(rs))
+ gfs2_rs_deltree(rs);
try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
ip->i_no_addr);
+ }
skip_rgrp:
+ rgrp_unlock(rs->rs_rbm.rgd);
+
/* Drop reservation, if we couldn't use reserved rgrp */
if (gfs2_rs_active(rs))
gfs2_rs_deltree(rs);
@@ -2169,7 +2219,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm,
bool dinode,
}
/**
- * rgblk_free - Change alloc state of given block(s)
+ * rgblk_free_and_lock - Change alloc state of given block(s) and lock rgrp ex
* @sdp: the filesystem
* @bstart: the start of a run of blocks to free
* @blen: the length of the block run (all must lie within ONE RG!)
@@ -2178,8 +2228,8 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm,
bool dinode,
* Returns: Resource group containing the block(s)
*/
-static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
- u32 blen, unsigned char new_state)
+static struct gfs2_rgrpd *rgblk_free_and_lock(struct gfs2_sbd *sdp, u64 bstart,
+ u32 blen, unsigned char new_state)
{
struct gfs2_rbm rbm;
struct gfs2_bitmap *bi, *bi_prev = NULL;
@@ -2192,6 +2242,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd
*sdp, u64 bstart,
}
gfs2_rbm_from_block(&rbm, bstart);
+ rgrp_lock(rbm.rgd);
while (blen--) {
bi = rbm_bi(&rbm);
if (bi != bi_prev) {
@@ -2341,6 +2392,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn,
unsigned int *nblocks,
int error;
gfs2_set_alloc_start(&rbm, ip, dinode);
+ rgrp_lock(rbm.rgd);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
if (error == -ENOSPC) {
@@ -2395,6 +2447,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn,
unsigned int *nblocks,
gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl,
rbm.rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock(rbm.rgd);
+
gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
if (dinode)
gfs2_trans_add_unrevoke(sdp, block, *nblocks);
@@ -2408,6 +2462,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn,
unsigned int *nblocks,
return 0;
rgrp_error:
+ rgrp_unlock(rbm.rgd);
gfs2_rgrp_error(rbm.rgd);
return -EIO;
}
@@ -2426,7 +2481,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64
bstart, u32 blen, int meta)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
- rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
+ rgd = rgblk_free_and_lock(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
@@ -2435,6 +2490,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64
bstart, u32 blen, int meta)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock(rgd);
/* Directories keep their data in the metadata address space */
if (meta || ip->i_depth)
@@ -2465,7 +2521,7 @@ void gfs2_unlink_di(struct inode *inode)
struct gfs2_rgrpd *rgd;
u64 blkno = ip->i_no_addr;
- rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
+ rgd = rgblk_free_and_lock(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
@@ -2473,6 +2529,7 @@ void gfs2_unlink_di(struct inode *inode)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
update_rgrp_lvb_unlinked(rgd, 1);
+ rgrp_unlock(rgd);
}
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
@@ -2480,7 +2537,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct
gfs2_inode *ip)
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct gfs2_rgrpd *tmp_rgd;
- tmp_rgd = rgblk_free(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE);
+ tmp_rgd = rgblk_free_and_lock(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE);
if (!tmp_rgd)
return;
gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
@@ -2494,6 +2551,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct
gfs2_inode *ip)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
update_rgrp_lvb_unlinked(rgd, -1);
+ rgrp_unlock(rgd);
gfs2_statfs_change(sdp, 0, +1, -1);
trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
@@ -2522,7 +2580,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64
no_addr, unsigned int type)
if (!rgd)
goto fail;
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, &rgd_gh);
if (error)
goto fail;
@@ -2601,16 +2660,15 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
*
*/
-void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
{
unsigned int x;
rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder),
GFP_NOFS | __GFP_NOFAIL);
for (x = 0; x < rlist->rl_rgrps; x++)
- gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
- state, 0,
- &rlist->rl_ghs[x]);
+ gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, &rlist->rl_ghs[x]);
}
/**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index e90478e2f545..ca40cdcc4b7e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -68,7 +68,7 @@ struct gfs2_rgrp_list {
extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
u64 block);
-extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
+extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index cf5c7f3080d2..57f30f96b3b6 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1131,8 +1131,9 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct
gfs2_statfs_change_host
done = 0;
else if (rgd_next && !error) {
error = gfs2_glock_nq_init(rgd_next->rd_gl,
- LM_ST_SHARED,
- GL_ASYNC,
+ LM_ST_EXCLUSIVE,
+ GL_ASYNC|
+ LM_FLAG_NODE_EX,
gh);
rgd_next = gfs2_rgrpd_get_next(rgd_next);
done = 0;
@@ -1507,7 +1508,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
goto out_qs;
}
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, &gh);
if (error)
goto out_qs;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index f2bce1e0f6fb..6a698a74dcb5 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -262,7 +262,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip,
struct buffer_head *bh,
return -EIO;
}
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, &rg_gh);
if (error)
return error;
@@ -1314,7 +1315,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
else
goto out;
- gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
+ gfs2_rlist_alloc(&rlist);
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl);
@@ -1397,7 +1398,8 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
return -EIO;
}
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_EX, &gh);
if (error)
return error;