Before this patch, function gfs2_inplace_reserve did a bunch of pre-checks based on loop counters and whether or not we have a pre-existing reservation we must use. Then it called function gfs2_rgrp_congested. But it only makes sense to check the rgrp glock statistics if we're using a real locking protocol like dlm. For lock_nolock, this is just a waste of time and can give false positives.
This patch breaks function gfs2_rgrp_congested into its two cases: inter-node and intra-node congestion. For intra-node congestion, the logic is just simplified from what gfs2_inplace_reserve had already done. The checks for inter-node congestion is moved to a separate gfs2_rgrp_congested_dlm function. So this basically stubs in a framework for doing more in-depth checks for intra-node congestion, in a later patch. Function fast_to_acquire is also moved to be closer to function gfs2_rgrp_congested. Signed-off-by: Bob Peterson <[email protected]> --- fs/gfs2/rgrp.c | 128 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 43 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9d5f35b01c1d..641bb4a8cf5b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1819,37 +1819,6 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } -/** - * fast_to_acquire - determine if a resource group will be fast to acquire - * - * 1. If this is one of our preferred rgrps, it should be quicker to acquire, - * because we tried to set ourselves up as dlm lock master. - * 2. If the glock is unlocked, consider it slow because it will take time for - * the dlm and the glock state machine to transition it to a locked state. - * 3. If there are glock holder records queued to the glock, consider it slow - * because this process will need to be queued up behind another process: - * Our request can't be enqueued until the other is dequeued. - * 4. If the rgrp glock is being demoted, consider it slow because the glock - * will need to be demoted, possibly used on another node, the promoted, - * all of which will take time. - * - */ -static inline bool fast_to_acquire(const struct gfs2_rgrpd *rgd) -{ - struct gfs2_glock *gl = rgd->rd_gl; - - if (rgd->rd_flags & GFS2_RDF_PREFERRED) - return true; - if (gl->gl_state == LM_ST_UNLOCKED) - return false; - if (!list_empty(&gl->gl_holders)) - return false; - if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) || - test_bit(GLF_DEMOTE, &gl->gl_flags)) - return false; - return true; -} - /** * gfs2_rgrp_used_recently - check if a rgrp has been used recently * @rgd: The rgrp to test @@ -1866,7 +1835,7 @@ static inline bool gfs2_rgrp_used_recently(const struct gfs2_rgrpd *rgd, } /** - * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested + * gfs2_rgrp_congested_dlm - Use stats to figure out if an rgrp is congested * @rgd: The rgrp in question * @loops: An indication of how picky we can be (0=very, 1=less so) * @@ -1892,7 +1861,7 @@ static inline bool gfs2_rgrp_used_recently(const struct gfs2_rgrpd *rgd, * Returns: A boolean verdict on the congestion status */ -static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) +static bool gfs2_rgrp_congested_dlm(const struct gfs2_rgrpd *rgd, int loops) { const struct gfs2_glock *gl = rgd->rd_gl; const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -1904,9 +1873,6 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) u64 var; int cpu, nonzero = 0; - if (loops == 0 && !fast_to_acquire(rgd)) - return true; - /* If it hasn't been used recently we can't judge the statistics, so assume it's not congested. */ if (!gfs2_rgrp_used_recently(rgd, HZ)) @@ -1946,6 +1912,86 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) return ((srttb_diff < 0) && (sqr_diff > var)); } +/** + * fast_to_acquire - determine if a resource group will be fast to acquire + * + * 1. If this is one of our preferred rgrps, it should be quicker to acquire, + * because we tried to set ourselves up as dlm lock master. + * 2. If the glock is unlocked, consider it slow because it will take time for + * the dlm and the glock state machine to transition it to a locked state. + * 3. If there are glock holder records queued to the glock, consider it slow + * because this process will need to be queued up behind another process: + * Our request can't be enqueued until the other is dequeued. + * 4. If the rgrp glock is being demoted, consider it slow because the glock + * will need to be demoted, possibly used on another node, the promoted, + * all of which will take time. + * + */ +static inline bool fast_to_acquire(const struct gfs2_rgrpd *rgd) +{ + struct gfs2_glock *gl = rgd->rd_gl; + + if (rgd->rd_flags & GFS2_RDF_PREFERRED) + return true; + if (gl->gl_state == LM_ST_UNLOCKED) + return false; + if (!list_empty(&gl->gl_holders)) + return false; + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) || + test_bit(GLF_DEMOTE, &gl->gl_flags)) + return false; + return true; +} + +/** + * gfs2_rgrp_congested - decide whether a rgrp glock is congested + * @rs: The reservation in question + * @loops: An indication of how picky we can be (0=very, 1=less so) + * + * There are two kinds of congestion: inter-node and intra-node. + * + * Inter-node congestion is where multiple nodes all want to allocate blocks + * inside the same rgrp, which means they need to trade the rgrp glock back + * and forth, which is a slow process. To mitigate this, we use glock + * statistics to predict whether the glock is historically fast to acquire. + * + * Intra-node congestion is where you have multiple processes on the same + * node, all trying to allocate blocks in the same rgrp. There's nothing wrong + * with doing so, but each process needs to wait for the other to release the + * rgrp glock before it may proceed. + * + * If we're not using inter-node locking (dlm) it doesn't make sense to check + * the glock statistics. Instead, we do some simple checks based on how + * desperate we are to get blocks (the number of loops). + * + * We know the number of loops we've been around, so we know how desperate we + * are to find something. On first loop, call it congested if anyone else has + * a block reservation. On second loop, call it congested if it's not fast to + * acquire. + */ +static bool gfs2_rgrp_congested(const struct gfs2_blkreserv *rs, int loops) +{ + const struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; + + /* We already have a reservation, we need to use it regardless */ + if (gfs2_rs_active(rs)) + return false; + + /* If we've rejected all the rgrps a few times, can no longer worry + about whether the rgrp is congested. Fill in blocks where we can. */ + if (loops >= 2) + return false; + + if (loops == 0 && !fast_to_acquire(rgd)) + return true; + + /* Check for inter-node congestion */ + if (rgd->rd_sbd->sd_lockstruct.ls_ops->lm_lock) /* lock_dlm */ + return gfs2_rgrp_congested_dlm(rgd, loops); + + return false; +} + static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -2019,18 +2065,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) rg_locked = 0; if (skip && skip--) goto next_rgrp; - if (!gfs2_rs_active(rs)) { - if ((loops < 2) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) - goto next_rgrp; - } + if (gfs2_rgrp_congested(rs, loops)) + goto next_rgrp; error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); if (unlikely(error)) return error; - if (!gfs2_rs_active(rs) && (loops < 2) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + if (gfs2_rgrp_congested(rs, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rbm.rgd); -- 2.14.3
