struct rb_root rd_rstree; /* multi-block reservation tree */
};
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4c7069b8f3c1..a9e858e01c97 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
+ rgrp_lock_local(rgd);
if (bi->bi_clone == NULL)
- return;
+ goto out;
if (sdp->sd_args.ar_discard)
gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1,
NULL);
memcpy(bi->bi_clone + bi->bi_offset,
@@ -85,6 +86,8 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
clear_bit(GBF_FULL, &bi->bi_flags);
rgd->rd_free_clone = rgd->rd_free;
rgd->rd_extfail_pt = rgd->rd_free;
+out:
+ rgrp_unlock_local(rgd);
}
/**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8a6b41f3667c..a89be4782c15 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -702,10 +702,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
rgd = rs->rs_rgd;
if (rgd) {
- spin_lock(&rgd->rd_rsspin);
+ rgrp_lock_local(rgd);
__rs_deltree(rs);
BUG_ON(rs->rs_free);
- spin_unlock(&rgd->rd_rsspin);
+ rgrp_unlock_local(rgd);
}
}
@@ -737,12 +737,12 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
struct rb_node *n;
struct gfs2_blkreserv *rs;
- spin_lock(&rgd->rd_rsspin);
+ rgrp_lock_local(rgd);
while ((n = rb_first(&rgd->rd_rstree))) {
rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
__rs_deltree(rs);
}
- spin_unlock(&rgd->rd_rsspin);
+ rgrp_unlock_local(rgd);
}
void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
@@ -948,7 +948,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
- spin_lock_init(&rgd->rd_rsspin);
+ mutex_init(&rgd->rd_lock);
error = compute_bitstructs(rgd);
if (error)
@@ -1469,9 +1469,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Trim each bitmap in the rgrp */
for (x = 0; x < rgd->rd_length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
+ rgrp_lock_local(rgd);
ret = gfs2_rgrp_send_discards(sdp,
rgd->rd_data0, NULL, bi, minlen,
&amt);
+ rgrp_unlock_local(rgd);
if (ret) {
gfs2_glock_dq_uninit(&gh);
goto out;
@@ -1483,9 +1485,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
if (ret == 0) {
bh = rgd->rd_bits[0].bi_bh;
+ rgrp_lock_local(rgd);
rgd->rd_flags |= GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, bh);
gfs2_rgrp_out(rgd, bh->b_data);
+ rgrp_unlock_local(rgd);
gfs2_trans_end(sdp);
}
}
@@ -1519,7 +1523,6 @@ static void rs_insert(struct gfs2_inode *ip)
BUG_ON(gfs2_rs_active(rs));
- spin_lock(&rgd->rd_rsspin);
newn = &rgd->rd_rstree.rb_node;
while (*newn) {
struct gfs2_blkreserv *cur =
@@ -1532,7 +1535,6 @@ static void rs_insert(struct gfs2_inode *ip)
else if (rc < 0)
newn = &((*newn)->rb_left);
else {
- spin_unlock(&rgd->rd_rsspin);
WARN_ON(1);
return;
}
@@ -1540,7 +1542,6 @@ static void rs_insert(struct gfs2_inode *ip)
rb_link_node(&rs->rs_node, parent, newn);
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
- spin_unlock(&rgd->rd_rsspin);
trace_gfs2_rs(rs, TRACE_RS_INSERT);
}
@@ -1632,7 +1633,6 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
struct rb_node *n;
int rc;
- spin_lock(&rgd->rd_rsspin);
n = rgd->rd_rstree.rb_node;
while (n) {
rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
@@ -1655,7 +1655,6 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd
*rgd, u64 block,
}
}
- spin_unlock(&rgd->rd_rsspin);
return block;
}
@@ -1857,7 +1856,22 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
while (1) {
+ /*
+ * We must be careful to avoid deadlock here:
+ * All transactions expect: sd_log_flush_lock followed by rgrp
+ * ex (if needed), but try_rgrp_unlink takes sd_log_flush_lock
+ * outside a transaction and therefore must not have the rgrp
+ * ex already held. To avoid deadlock, we drop the rgrp ex lock
+ * before taking the log_flush_lock, then reacquire it to
+ * protect our call to gfs2_rbm_find.
+ *
+ * Also note that rgrp_unlock_local must come AFTER the caller
does
+ * gfs2_rs_deltree because rgrp ex needs to be held before
+ * making reservations.
+ */
+ rgrp_unlock_local(rgd);
down_write(&sdp->sd_log_flush_lock);
+ rgrp_lock_local(rgd);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
true);
up_write(&sdp->sd_log_flush_lock);
@@ -2055,7 +2069,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
struct gfs2_blkreserv *rs = &ip->i_res;
- int error = 0, rg_locked, flags = 0;
+ int error = 0, flags = 0;
+ bool rg_locked;
u64 last_unlinked = NO_BLOCK;
int loops = 0;
u32 free_blocks, skip = 0;
@@ -2081,10 +2096,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
return -EBADSLT;
while (loops < 3) {
- rg_locked = 1;
-
- if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
- rg_locked = 0;
+ rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
+ if (rg_locked) {
+ rgrp_lock_local(rs->rs_rgd);
+ } else {
if (skip && skip--)
goto next_rgrp;
if (!gfs2_rs_active(rs)) {
@@ -2101,12 +2116,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
&ip->i_rgd_gh);
if (unlikely(error))
return error;
+ rgrp_lock_local(rs->rs_rgd);
if (!gfs2_rs_active(rs) && (loops < 2) &&
gfs2_rgrp_congested(rs->rs_rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
error = update_rgrp_lvb(rs->rs_rgd);
if (unlikely(error)) {
+ rgrp_unlock_local(rs->rs_rgd);
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
return error;
}
@@ -2140,9 +2157,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
rs->rs_reserved = ap->target;
if (rs->rs_reserved > free_blocks)
rs->rs_reserved = free_blocks;
- spin_lock(&rs->rs_rgd->rd_rsspin);
rgd->rd_reserved += rs->rs_reserved;
- spin_unlock(&rs->rs_rgd->rd_rsspin);
+ rgrp_unlock_local(rs->rs_rgd);
return 0;
}
check_rgrp:
@@ -2151,6 +2167,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct
gfs2_alloc_parms *ap)
try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
ip->i_no_addr);
skip_rgrp:
+ rgrp_unlock_local(rs->rs_rgd);
+
/* Drop reservation, if we couldn't use reserved rgrp */
if (gfs2_rs_active(rs))
gfs2_rs_deltree(rs);
@@ -2199,10 +2217,10 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
if (rs->rs_reserved) {
struct gfs2_rgrpd *rgd = rs->rs_rgd;
- spin_lock(&rgd->rd_rsspin);
+ rgrp_lock_local(rgd);
BUG_ON(rgd->rd_reserved < rs->rs_reserved);
rgd->rd_reserved -= rs->rs_reserved;
- spin_unlock(&rs->rs_rgd->rd_rsspin);
+ rgrp_unlock_local(rgd);
rs->rs_reserved = 0;
}
if (gfs2_holder_initialized(&ip->i_rgd_gh))
@@ -2304,12 +2322,12 @@ void gfs2_rgrp_dump(struct seq_file *seq, const struct
gfs2_glock *gl)
be32_to_cpu(rgl->rl_free),
be32_to_cpu(rgl->rl_dinodes));
}
- spin_lock(&rgd->rd_rsspin);
+ rgrp_lock_local(rgd);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
dump_rs(seq, trs);
}
- spin_unlock(&rgd->rd_rsspin);
+ rgrp_unlock_local(rgd);
}
static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
@@ -2339,7 +2357,6 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
struct gfs2_blkreserv *rs = &ip->i_res;
struct gfs2_rgrpd *rgd = rbm->rgd;
- spin_lock(&rgd->rd_rsspin);
BUG_ON(rs->rs_reserved < len);
rgd->rd_reserved -= len;
rs->rs_reserved -= len;
@@ -2355,15 +2372,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode
*ip,
trace_gfs2_rs(rs, TRACE_RS_CLAIM);
if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
rs->rs_free)
- goto out;
+ return;
/* We used up our block reservation, so we should
reserve more blocks next time. */
atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
}
__rs_deltree(rs);
}
-out:
- spin_unlock(&rgd->rd_rsspin);
}
/**
@@ -2416,6 +2431,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn,
unsigned int *nblocks,
BUG_ON(ip->i_res.rs_reserved < *nblocks);
+ rgrp_lock_local(rbm.rgd);
gfs2_set_alloc_start(&rbm, ip, dinode);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
@@ -2460,6 +2476,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
}
rbm.rgd->rd_free -= *nblocks;
+ rbm.rgd->rd_free_clone -= *nblocks;
if (dinode) {
rbm.rgd->rd_dinodes++;
*generation = rbm.rgd->rd_igeneration++;
@@ -2469,6 +2486,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn,
unsigned int *nblocks,
gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rbm.rgd);
gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
if (dinode)
@@ -2476,13 +2494,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn,
unsigned int *nblocks,
gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
- rbm.rgd->rd_free_clone -= *nblocks;
trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
rgrp_error:
+ rgrp_unlock_local(rbm.rgd);
gfs2_rgrp_error(rbm.rgd);
return -EIO;
}
@@ -2502,12 +2520,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct
gfs2_rgrpd *rgd,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rgd);
/* Directories keep their data in the metadata address space */
if (meta || ip->i_depth)
@@ -2543,17 +2563,20 @@ void gfs2_unlink_di(struct inode *inode)
rgd = gfs2_blk2rgrpd(sdp, blkno, true);
if (!rgd)
return;
+ rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
+ rgrp_unlock_local(rgd);
}
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
if (!rgd->rd_dinodes)
gfs2_consist_rgrpd(rgd);
@@ -2562,6 +2585,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct
gfs2_inode *ip)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rgd);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
gfs2_statfs_change(sdp, 0, +1, -1);
@@ -2576,6 +2600,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct
gfs2_inode *ip)
* @no_addr: The block number to check
* @type: The block type we are looking for
*
+ * The inode glock of @no_addr must be held. The @type to check for is either
+ * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
+ * or GFS2_BLKST_USED would make no sense.
+ *
* Returns: 0 if the block type matches the expected type
* -ESTALE if it doesn't match
* or -ve errno if something went wrong while checking
@@ -2601,6 +2629,12 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64
no_addr, unsigned int type)
if (WARN_ON_ONCE(error))
goto fail;
+ /*
+ * No need to take the local resource group lock here; the inode glock
+ * of @no_addr provides the necessary synchronization in case the block
+ * is an inode. (In case the block is not an inode, the block type
+ * will not match the @type we are looking for.)
+ */
if (gfs2_testbit(&rbm, false) != type)
error = -ESTALE;
@@ -2723,3 +2757,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
}
}
+void rgrp_lock_local(struct gfs2_rgrpd *rgd)
+{
+ BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
+ !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
+ mutex_lock(&rgd->rd_lock);
+}
+
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
+{
+ mutex_unlock(&rgd->rd_lock);
+}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b596c3d17988..33e52dab76ef 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -92,4 +92,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd,
u64 block)
}
extern void check_and_update_goal(struct gfs2_inode *ip);
+
+extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+
#endif /* __RGRP_DOT_H__ */