Add a new rg_skip field to struct gfs2_rgrp, replacing __pad. The
rg_skip field has the following meaning:

- If rg_skip is zero, it is considered unset and not useful.
- If rg_skip is non-zero, its value will be the number of blocks between
  this rgrp's address and the next rgrp's address. This can be used as a
  hint by fsck.gfs2 when rebuilding a bad rindex, for example.

When gfs2_rgrp_bh_get() reads a resource group header and finds rg_skip
to be 0 it will attempt to set it to the difference between its rd_addr
and the rd_addr of the next resource group.

The only special case is the final rgrp, which always has a rg_skip of
0. It is not set to a special value (like -1) because, when the
filesystem is grown, the rgrp will no longer be the final one and it
will then need to have its rg_skip field set. The overhead of this
special case is a gfs2_rgrpd_get_next() call each time
gfs2_rgrp_bh_get() is called for the final resource group.

For the other resource groups, if the rg_skip field is 0, it is set
appropriately and then the only overhead becomes the rgd->rg_skip == 0
comparison in gfs2_rgrp_bh_get().

Before this patch, gfs2_rgrp_out() zeroes the __pad field explicitly, so
the rg_skip field can get set back to 0 in cases where nodes with and
without this patch are mixed in a cluster. In some cases, the field may
bounce between being set by one node and then zeroed by another which
may harm performance slightly, e.g. when two nodes create many small
files. In testing this situation is rare but it becomes more likely as
the filesystem fills up and there are fewer resource groups to choose
from. The problem goes away when all nodes are running with this patch.
Dipping into the space currently occupied by the rg_reserved field would
have resulted in the same problem as it is also explicitly zeroed, so
unfortunately there is no other way around it.

Signed-off-by: Andrew Price <anpr...@redhat.com>
---
 fs/gfs2/incore.h                 |  1 +
 fs/gfs2/rgrp.c                   | 27 ++++++++++++++++++++++++++-
 include/uapi/linux/gfs2_ondisk.h |  2 +-
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 845fb09..84cc1fd 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -88,6 +88,7 @@ struct gfs2_rgrpd {
        u32 rd_reserved;                /* number of blocks reserved */
        u32 rd_free_clone;
        u32 rd_dinodes;
+       u32 rd_skip;                    /* Distance to the next rgrp in fs 
blocks */
        u64 rd_igeneration;
        struct gfs2_bitmap *rd_bits;
        struct gfs2_sbd *rd_sbd;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07c0265..9779258 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1048,6 +1048,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const 
void *buf)
        rgd->rd_flags |= rg_flags;
        rgd->rd_free = be32_to_cpu(str->rg_free);
        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
+       rgd->rd_skip = be32_to_cpu(str->rg_skip);
        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
 }
 
@@ -1058,7 +1059,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void 
*buf)
        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
        str->rg_free = cpu_to_be32(rgd->rd_free);
        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
-       str->__pad = cpu_to_be32(0);
+       str->rg_skip = cpu_to_be32(rgd->rd_skip);
        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
@@ -1118,6 +1119,28 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
        return count;
 }
 
+/**
+ * Set the rg_next field if this isn't the final rgrp.
+ */
+static void gfs2_rgrp_set_skip(struct gfs2_rgrpd *rgd)
+{
+       struct gfs2_sbd *sdp = rgd->rd_sbd;
+       struct buffer_head *bh = rgd->rd_bits[0].bi_bh;
+       struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
+
+       if (next == NULL || next->rd_addr <= rgd->rd_addr)
+               return;
+
+       if (gfs2_trans_begin(sdp, RES_RG_HDR, 0) != 0)
+               return;
+
+       rgd->rd_skip = next->rd_addr - rgd->rd_addr;
+       gfs2_trans_add_meta(rgd->rd_gl, bh);
+       gfs2_rgrp_out(rgd, bh->b_data);
+       gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
+       gfs2_trans_end(sdp);
+       return;
+}
 
 /**
  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
@@ -1183,6 +1206,8 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
                if (rgd->rd_rgl->rl_unlinked == 0)
                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
        }
+       if (rgd->rd_skip == 0)
+               gfs2_rgrp_set_skip(rgd);
        return 0;
 
 fail:
diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h
index 7c4be77..a35c26c 100644
--- a/include/uapi/linux/gfs2_ondisk.h
+++ b/include/uapi/linux/gfs2_ondisk.h
@@ -186,7 +186,7 @@ struct gfs2_rgrp {
        __be32 rg_flags;
        __be32 rg_free;
        __be32 rg_dinodes;
-       __be32 __pad;
+       __be32 rg_skip;       /* Distance to the next rgrp in fs blocks */
        __be64 rg_igeneration;
 
        __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
-- 
2.4.3

Reply via email to