From: Bob Peterson <rpete...@redhat.com> Prepare for treating resource group glocks as exclusive among nodes but shared among all tasks running on a node: introduce another layer of node-specific locking that the local tasks can use to coordinate their accesses.
Signed-off-by: Andreas Gruenbacher <agrue...@redhat.com> --- fs/gfs2/incore.h | 2 ++ fs/gfs2/lops.c | 5 +++- fs/gfs2/rgrp.c | 74 ++++++++++++++++++++++++++++++++++++++++++------ fs/gfs2/rgrp.h | 4 +++ 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ca25043fc26df..feba57a2a6bab 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -23,6 +23,7 @@ #include <linux/percpu.h> #include <linux/lockref.h> #include <linux/rhashtable.h> +#include <linux/mutex.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -121,6 +122,7 @@ struct gfs2_rgrpd { #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ + struct mutex rd_mutex; struct rb_root rd_rstree; /* multi-block reservation tree */ }; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4c7069b8f3c1d..a9e858e01c97f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd) unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; + rgrp_lock_local(rgd); if (bi->bi_clone == NULL) - return; + goto out; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); memcpy(bi->bi_clone + bi->bi_offset, @@ -85,6 +86,8 @@ static void maybe_release_space(struct gfs2_bufdata *bd) clear_bit(GBF_FULL, &bi->bi_flags); rgd->rd_free_clone = rgd->rd_free; rgd->rd_extfail_pt = rgd->rd_free; +out: + rgrp_unlock_local(rgd); } /** diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 1f427459a584d..95ecd81e61e70 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -952,6 +952,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); + mutex_init(&rgd->rd_mutex); error = compute_bitstructs(rgd); if (error) @@ -1472,9 +1473,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; + rgrp_lock_local(rgd); ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, minlen, &amt); + rgrp_unlock_local(rgd); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; @@ -1486,9 +1489,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); if (ret == 0) { bh = rgd->rd_bits[0].bi_bh; + rgrp_lock_local(rgd); rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); + rgrp_unlock_local(rgd); gfs2_trans_end(sdp); } } @@ -1864,7 +1869,22 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { + /* + * We must be careful to avoid deadlock here: + * All transactions expect: sd_log_flush_lock followed by rgrp + * ex (if needed), but try_rgrp_unlink takes sd_log_flush_lock + * outside a transaction and therefore must not have the rgrp + * ex already held. To avoid deadlock, we drop the rgrp ex lock + * before taking the log_flush_lock, then reacquire it to + * protect our call to gfs2_rbm_find. + * + * Also note that rgrp_unlock_local must come AFTER the caller does + * gfs2_rs_deltree because rgrp ex needs to be held before + * making reservations. + */ + rgrp_unlock_local(rgd); down_write(&sdp->sd_log_flush_lock); + rgrp_lock_local(rgd); error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, true); up_write(&sdp->sd_log_flush_lock); @@ -2062,7 +2082,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = &ip->i_res; - int error = 0, rg_locked, flags = 0; + int error = 0, flags = 0; + bool rg_locked; u64 last_unlinked = NO_BLOCK; int loops = 0; u32 free_blocks, skip = 0; @@ -2088,10 +2109,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) return -EBADSLT; while (loops < 3) { - rg_locked = 1; - - if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { - rg_locked = 0; + rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl); + if (rg_locked) { + rgrp_lock_local(rs->rs_rgd); + } else { if (skip && skip--) goto next_rgrp; if (!gfs2_rs_active(rs)) { @@ -2108,12 +2129,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) &ip->i_rgd_gh); if (unlikely(error)) return error; + rgrp_lock_local(rs->rs_rgd); if (!gfs2_rs_active(rs) && (loops < 2) && gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rgd); if (unlikely(error)) { + rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); return error; } @@ -2147,9 +2170,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) rs->rs_reserved = ap->target; if (rs->rs_reserved > free_blocks) rs->rs_reserved = free_blocks; - spin_lock(&rs->rs_rgd->rd_rsspin); + spin_lock(&rgd->rd_rsspin); rgd->rd_reserved += rs->rs_reserved; - spin_unlock(&rs->rs_rgd->rd_rsspin); + spin_unlock(&rgd->rd_rsspin); + rgrp_unlock_local(rs->rs_rgd); return 0; } check_rgrp: @@ -2158,6 +2182,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + rgrp_unlock_local(rs->rs_rgd); + /* Drop reservation, if we couldn't use reserved rgrp */ if (gfs2_rs_active(rs)) gfs2_rs_deltree(rs); @@ -2209,7 +2235,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) spin_lock(&rgd->rd_rsspin); BUG_ON(rgd->rd_reserved < rs->rs_reserved); rgd->rd_reserved -= rs->rs_reserved; - spin_unlock(&rs->rs_rgd->rd_rsspin); + spin_unlock(&rgd->rd_rsspin); rs->rs_reserved = 0; } if (gfs2_holder_initialized(&ip->i_rgd_gh)) @@ -2300,6 +2326,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) if (rgd == NULL) return; + spin_lock(&rgd->rd_rsspin); gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, @@ -2312,7 +2339,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); } - spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); dump_rs(seq, trs); @@ -2439,6 +2465,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, BUG_ON(ip->i_res.rs_reserved < *nblocks); + rgrp_lock_local(rbm.rgd); gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false); @@ -2488,6 +2515,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rbm.rgd); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2501,6 +2529,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, return 0; rgrp_error: + rgrp_unlock_local(rbm.rgd); gfs2_rgrp_error(rbm.rgd); return -EIO; } @@ -2520,12 +2549,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE); trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -2561,17 +2592,20 @@ void gfs2_unlink_di(struct inode *inode) rgd = gfs2_blk2rgrpd(sdp, blkno, true); if (!rgd) return; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED); trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); + rgrp_unlock_local(rgd); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = rgd->rd_sbd; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); if (!rgd->rd_dinodes) gfs2_consist_rgrpd(rgd); @@ -2580,6 +2614,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); gfs2_statfs_change(sdp, 0, +1, -1); @@ -2594,6 +2629,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) * @no_addr: The block number to check * @type: The block type we are looking for * + * The inode glock of @no_addr must be held. The @type to check for is either + * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE + * or GFS2_BLKST_USED would make no sense. + * * Returns: 0 if the block type matches the expected type * -ESTALE if it doesn't match * or -ve errno if something went wrong while checking @@ -2619,6 +2658,12 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) if (WARN_ON_ONCE(error)) goto fail; + /* + * No need to take the local resource group lock here; the inode glock + * of @no_addr provides the necessary synchronization in case the block + * is an inode. (In case the block is not an inode, the block type + * will not match the @type we are looking for.) + */ if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; @@ -2741,3 +2786,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) } } +void rgrp_lock_local(struct gfs2_rgrpd *rgd) +{ + BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) && + !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); + mutex_lock(&rgd->rd_mutex); +} + +void rgrp_unlock_local(struct gfs2_rgrpd *rgd) +{ + mutex_unlock(&rgd->rd_mutex); +} diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b596c3d179888..33e52dab76efa 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -92,4 +92,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) } extern void check_and_update_goal(struct gfs2_inode *ip); + +extern void rgrp_lock_local(struct gfs2_rgrpd *rgd); +extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd); + #endif /* __RGRP_DOT_H__ */ -- 2.19.1.546.g028f9c799.dirty