[Cluster-devel] [GFS2 PATCH 5/6] GFS2: Split gfs2_rgrp_congested into dlm and non-dlm cases

Bob Peterson Wed, 10 Jan 2018 12:44:34 -0800

Before this patch, function gfs2_inplace_reserve did a bunch of
pre-checks based on loop counters and whether or not we have a
pre-existing reservation we must use. Then it called function
gfs2_rgrp_congested. But it only makes sense to check the rgrp
glock statistics if we're using a real locking protocol like dlm.
For lock_nolock, this is just a waste of time and can give false
positives.


This patch breaks function gfs2_rgrp_congested into its two cases:
inter-node and intra-node congestion. For intra-node congestion,
the logic is just simplified from what gfs2_inplace_reserve had
already done. The checks for inter-node congestion is moved to
a separate gfs2_rgrp_congested_dlm function.

So this basically stubs in a framework for doing more in-depth
checks for intra-node congestion, in a later patch.

Function fast_to_acquire is also moved to be closer to function
gfs2_rgrp_congested.

Signed-off-by: Bob Peterson <[email protected]>
---
 fs/gfs2/rgrp.c | 128 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 85 insertions(+), 43 deletions(-)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9d5f35b01c1d..641bb4a8cf5b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1819,37 +1819,6 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 
*last_unlinked, u64 skip
        return;
 }
 
-/**
- * fast_to_acquire - determine if a resource group will be fast to acquire
- *
- * 1. If this is one of our preferred rgrps, it should be quicker to acquire,
- *    because we tried to set ourselves up as dlm lock master.
- * 2. If the glock is unlocked, consider it slow because it will take time for
- *    the dlm and the glock state machine to transition it to a locked state.
- * 3. If there are glock holder records queued to the glock, consider it slow
- *    because this process will need to be queued up behind another process:
- *    Our request can't be enqueued until the other is dequeued.
- * 4. If the rgrp glock is being demoted, consider it slow because the glock
- *    will need to be demoted, possibly used on another node, the promoted,
- *    all of which will take time.
- *
- */
-static inline bool fast_to_acquire(const struct gfs2_rgrpd *rgd)
-{
-       struct gfs2_glock *gl = rgd->rd_gl;
-
-       if (rgd->rd_flags & GFS2_RDF_PREFERRED)
-               return true;
-       if (gl->gl_state == LM_ST_UNLOCKED)
-               return false;
-       if (!list_empty(&gl->gl_holders))
-               return false;
-       if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) ||
-           test_bit(GLF_DEMOTE, &gl->gl_flags))
-               return false;
-       return true;
-}
-
 /**
  * gfs2_rgrp_used_recently - check if a rgrp has been used recently
  * @rgd: The rgrp to test
@@ -1866,7 +1835,7 @@ static inline bool gfs2_rgrp_used_recently(const struct 
gfs2_rgrpd *rgd,
 }
 
 /**
- * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
+ * gfs2_rgrp_congested_dlm - Use stats to figure out if an rgrp is congested
  * @rgd: The rgrp in question
  * @loops: An indication of how picky we can be (0=very, 1=less so)
  *
@@ -1892,7 +1861,7 @@ static inline bool gfs2_rgrp_used_recently(const struct 
gfs2_rgrpd *rgd,
  * Returns: A boolean verdict on the congestion status
  */
 
-static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
+static bool gfs2_rgrp_congested_dlm(const struct gfs2_rgrpd *rgd, int loops)
 {
        const struct gfs2_glock *gl = rgd->rd_gl;
        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -1904,9 +1873,6 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd 
*rgd, int loops)
        u64 var;
        int cpu, nonzero = 0;
 
-       if (loops == 0 && !fast_to_acquire(rgd))
-               return true;
-
        /* If it hasn't been used recently we can't judge the statistics, so
           assume it's not congested. */
        if (!gfs2_rgrp_used_recently(rgd, HZ))
@@ -1946,6 +1912,86 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd 
*rgd, int loops)
        return ((srttb_diff < 0) && (sqr_diff > var));
 }
 
+/**
+ * fast_to_acquire - determine if a resource group will be fast to acquire
+ *
+ * 1. If this is one of our preferred rgrps, it should be quicker to acquire,
+ *    because we tried to set ourselves up as dlm lock master.
+ * 2. If the glock is unlocked, consider it slow because it will take time for
+ *    the dlm and the glock state machine to transition it to a locked state.
+ * 3. If there are glock holder records queued to the glock, consider it slow
+ *    because this process will need to be queued up behind another process:
+ *    Our request can't be enqueued until the other is dequeued.
+ * 4. If the rgrp glock is being demoted, consider it slow because the glock
+ *    will need to be demoted, possibly used on another node, the promoted,
+ *    all of which will take time.
+ *
+ */
+static inline bool fast_to_acquire(const struct gfs2_rgrpd *rgd)
+{
+       struct gfs2_glock *gl = rgd->rd_gl;
+
+       if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+               return true;
+       if (gl->gl_state == LM_ST_UNLOCKED)
+               return false;
+       if (!list_empty(&gl->gl_holders))
+               return false;
+       if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) ||
+           test_bit(GLF_DEMOTE, &gl->gl_flags))
+               return false;
+       return true;
+}
+
+/**
+ * gfs2_rgrp_congested - decide whether a rgrp glock is congested
+ * @rs: The reservation in question
+ * @loops: An indication of how picky we can be (0=very, 1=less so)
+ *
+ * There are two kinds of congestion: inter-node and intra-node.
+ *
+ * Inter-node congestion is where multiple nodes all want to allocate blocks
+ * inside the same rgrp, which means they need to trade the rgrp glock back
+ * and forth, which is a slow process. To mitigate this, we use glock
+ * statistics to predict whether the glock is historically fast to acquire.
+ *
+ * Intra-node congestion is where you have multiple processes on the same
+ * node, all trying to allocate blocks in the same rgrp. There's nothing wrong
+ * with doing so, but each process needs to wait for the other to release the
+ * rgrp glock before it may proceed.
+ *
+ * If we're not using inter-node locking (dlm) it doesn't make sense to check
+ * the glock statistics. Instead, we do some simple checks based on how
+ * desperate we are to get blocks (the number of loops).
+ *
+ * We know the number of loops we've been around, so we know how desperate we
+ * are to find something. On first loop, call it congested if anyone else has
+ * a block reservation. On second loop, call it congested if it's not fast to
+ * acquire.
+ */
+static bool gfs2_rgrp_congested(const struct gfs2_blkreserv *rs, int loops)
+{
+       const struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
+
+       /* We already have a reservation, we need to use it regardless */
+       if (gfs2_rs_active(rs))
+               return false;
+
+       /* If we've rejected all the rgrps a few times, can no longer worry
+          about whether the rgrp is congested. Fill in blocks where we can. */
+       if (loops >= 2)
+               return false;
+
+       if (loops == 0 && !fast_to_acquire(rgd))
+               return true;
+
+       /* Check for inter-node congestion */
+       if (rgd->rd_sbd->sd_lockstruct.ls_ops->lm_lock) /* lock_dlm */
+               return gfs2_rgrp_congested_dlm(rgd, loops);
+
+       return false;
+}
+
 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
 {
        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -2019,18 +2065,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct 
gfs2_alloc_parms *ap)
                        rg_locked = 0;
                        if (skip && skip--)
                                goto next_rgrp;
-                       if (!gfs2_rs_active(rs)) {
-                               if ((loops < 2) &&
-                                   gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
-                                       goto next_rgrp;
-                       }
+                       if (gfs2_rgrp_congested(rs, loops))
+                               goto next_rgrp;
                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
                                                   LM_ST_EXCLUSIVE, flags,
                                                   &rs->rs_rgd_gh);
                        if (unlikely(error))
                                return error;
-                       if (!gfs2_rs_active(rs) && (loops < 2) &&
-                           gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+                       if (gfs2_rgrp_congested(rs, loops))
                                goto skip_rgrp;
                        if (sdp->sd_args.ar_rgrplvb) {
                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
-- 
2.14.3

[Cluster-devel] [GFS2 PATCH 5/6] GFS2: Split gfs2_rgrp_congested into dlm and non-dlm cases

Reply via email to