[Cluster-devel] [PATCH 09/11] gfs2: Remove unnecessary gfs2_rlist_alloc parameter

2018-10-05 Thread Andreas Gruenbacher
From: Bob Peterson 

The state parameter of gfs2_rlist_alloc is set to LM_ST_EXCLUSIVE in all
calls, so remove it and hardcode that state in gfs2_rlist_alloc instead.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/dir.c   | 2 +-
 fs/gfs2/rgrp.c  | 5 ++---
 fs/gfs2/rgrp.h  | 2 +-
 fs/gfs2/xattr.c | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index e37002560c11..89c601e5e52f 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -2018,7 +2018,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 
index, u32 len,
l_blocks++;
}
 
-   gfs2_rlist_alloc(, LM_ST_EXCLUSIVE);
+   gfs2_rlist_alloc();
 
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ef6768bcff21..76a0a8073c11 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2697,13 +2697,12 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct 
gfs2_rgrp_list *rlist,
  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
  *  and initialize an array of glock holders for them
  * @rlist: the list of resource groups
- * @state: the lock state to acquire the RG lock in
  *
  * FIXME: Don't use NOFAIL
  *
  */
 
-void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
 {
unsigned int x;
 
@@ -2712,7 +2711,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, 
unsigned int state)
  GFP_NOFS | __GFP_NOFAIL);
for (x = 0; x < rlist->rl_rgrps; x++)
gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
-   state, 0,
+   LM_ST_EXCLUSIVE, 0,
>rl_ghs[x]);
 }
 
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 6bb5ee112324..09519ae10fb6 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -67,7 +67,7 @@ struct gfs2_rgrp_list {
 
 extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
   u64 block);
-extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
+extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
 extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
 extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
 extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 38515988aaf7..e11f77f080a0 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1299,7 +1299,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
else
goto out;
 
-   gfs2_rlist_alloc(, LM_ST_EXCLUSIVE);
+   gfs2_rlist_alloc();
 
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl);
-- 
2.17.1



[Cluster-devel] [PATCH 05/11] gfs2: Fix some minor typos

2018-10-05 Thread Andreas Gruenbacher
Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/quota.c | 2 +-
 fs/gfs2/rgrp.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 0efae7a0ee80..2ae5a109eea7 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1183,7 +1183,7 @@ static int print_message(struct gfs2_quota_data *qd, char 
*type)
  *
  * Returns: 0 on success.
  *  min_req = ap->min_target ? ap->min_target : ap->target;
- *  quota must allow atleast min_req blks for success and
+ *  quota must allow at least min_req blks for success and
  *  ap->allowed is set to the number of blocks allowed
  *
  *  -EDQUOT otherwise, quota violation. ap->allowed is set to number
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index aa96fd32eaf1..070ad493a4ec 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2022,7 +2022,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
  * We try our best to find an rgrp that has at least ap->target blocks
  * available. After a couple of passes (loops == 2), the prospects of finding
  * such an rgrp diminish. At this stage, we return the first rgrp that has
- * atleast ap->min_target blocks available. Either way, we set ap->allowed to
+ * at least ap->min_target blocks available. Either way, we set ap->allowed to
  * the number of blocks available in the chosen rgrp.
  *
  * Returns: 0 on success,
@@ -2091,7 +2091,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct 
gfs2_alloc_parms *ap)
}
}
 
-   /* Skip unuseable resource groups */
+   /* Skip unusable resource groups */
if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
 GFS2_RDF_ERROR)) ||
(loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
-- 
2.17.1



[Cluster-devel] [PATCH 01/11] gfs2: Always check the result of gfs2_rbm_from_block

2018-10-05 Thread Andreas Gruenbacher
When gfs2_rbm_from_block fails, the rbm it returns is undefined, so we
always want to make sure gfs2_rbm_from_block has succeeded before
looking at the rbm.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/rgrp.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fc181c81cca2..c9caddc2627c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2227,7 +2227,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd 
*sdp, u64 bstart,
return NULL;
}
 
-   gfs2_rbm_from_block(, bstart);
+   BUG_ON(gfs2_rbm_from_block(, bstart));
while (blen--) {
bi = rbm_bi();
if (bi != bi_prev) {
@@ -2360,7 +2360,7 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
else
goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
 
-   gfs2_rbm_from_block(rbm, goal);
+   BUG_ON(gfs2_rbm_from_block(rbm, goal));
 }
 
 /**
@@ -2569,7 +2569,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 
no_addr, unsigned int type)
 
rbm.rgd = rgd;
error = gfs2_rbm_from_block(, no_addr);
-   WARN_ON_ONCE(error != 0);
+   if (WARN_ON_ONCE(error))
+   goto fail;
 
if (gfs2_testbit(, false) != type)
error = -ESTALE;
-- 
2.17.1



[Cluster-devel] [PATCH 07/11] gfs2: Fix marking bitmaps non-full

2018-10-05 Thread Andreas Gruenbacher
Reservations in gfs can span multiple gfs2_bitmaps (but they won't span
multiple resource groups).  When removing a reservation, we want to
clear the GBF_FULL flags of all involved gfs2_bitmaps, not just that of
the first bitmap.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/rgrp.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ee6ea7d8cf44..ee981085db33 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -672,7 +672,7 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
RB_CLEAR_NODE(>rs_node);
 
if (rs->rs_free) {
-   struct gfs2_bitmap *bi;
+   struct gfs2_bitmap *start, *last;
 
/* return reserved blocks to the rgrp */
BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
@@ -682,10 +682,15 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
   contiguous with a span of free blocks that follows. Still,
   it will force the number to be recalculated later. */
rgd->rd_extfail_pt += rs->rs_free;
+   start = gfs2_block_to_bitmap(rgd, rs->rs_start);
+   last = gfs2_block_to_bitmap(rgd,
+   rs->rs_start + rs->rs_free - 1);
rs->rs_free = 0;
-   bi = gfs2_block_to_bitmap(rgd, rs->rs_start);
-   if (bi)
-   clear_bit(GBF_FULL, >bi_flags);
+   if (!start || !last)
+   return;
+   do
+   clear_bit(GBF_FULL, >bi_flags);
+   while (start++ != last);
}
 }
 
-- 
2.17.1



[Cluster-devel] [PATCH 04/11] gfs2: Rename bitmap.bi_{len => bytes}

2018-10-05 Thread Andreas Gruenbacher
This field indicates the size of the bitmap in bytes, similar to how the
bi_blocks field indicates the size of the bitmap in blocks.

In count_unlinked, replace an instance of bi_bytes * GFS2_NBBY by
bi_blocks.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/incore.h |  2 +-
 fs/gfs2/lops.c   |  2 +-
 fs/gfs2/rgrp.c   | 32 
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9d7d9bd8c3a9..a1771d8a93be 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -92,7 +92,7 @@ struct gfs2_bitmap {
unsigned long bi_flags;
u32 bi_offset;
u32 bi_start;
-   u32 bi_len;
+   u32 bi_bytes;
u32 bi_blocks;
 };
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f2567f958d00..4c7069b8f3c1 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -81,7 +81,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
if (sdp->sd_args.ar_discard)
gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, 
NULL);
memcpy(bi->bi_clone + bi->bi_offset,
-  bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
+  bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
clear_bit(GBF_FULL, >bi_flags);
rgd->rd_free_clone = rgd->rd_free;
rgd->rd_extfail_pt = rgd->rd_free;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 52e5a0f24c9f..aa96fd32eaf1 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -90,7 +90,7 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, 
bool do_clone,
 {
unsigned char *byte1, *byte2, *end, cur_state;
struct gfs2_bitmap *bi = rbm_bi(rbm);
-   unsigned int buflen = bi->bi_len;
+   unsigned int buflen = bi->bi_bytes;
const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 
byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
@@ -105,8 +105,8 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, 
bool do_clone,
rbm->offset, cur_state, new_state);
pr_warn("rgrp=0x%llx bi_start=0x%x\n",
(unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
-   pr_warn("bi_offset=0x%x bi_len=0x%x\n",
-   bi->bi_offset, bi->bi_len);
+   pr_warn("bi_offset=0x%x bi_bytes=0x%x\n",
+   bi->bi_offset, bi->bi_bytes);
dump_stack();
gfs2_consist_rgrpd(rbm->rgd);
return;
@@ -382,7 +382,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, 
u32 len)
if (bi->bi_clone)
start = bi->bi_clone;
start += bi->bi_offset;
-   end = start + bi->bi_len;
+   end = start + bi->bi_bytes;
BUG_ON(rbm.offset & 3);
start += (rbm.offset / GFS2_NBBY);
bytes = min_t(u32, len / GFS2_NBBY, (end - start));
@@ -467,7 +467,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
count[x] += gfs2_bitcount(rgd,
  bi->bi_bh->b_data +
  bi->bi_offset,
- bi->bi_len, x);
+ bi->bi_bytes, x);
}
 
if (count[0] != rgd->rd_free) {
@@ -780,21 +780,21 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
bytes = bytes_left;
bi->bi_offset = sizeof(struct gfs2_rgrp);
bi->bi_start = 0;
-   bi->bi_len = bytes;
+   bi->bi_bytes = bytes;
bi->bi_blocks = bytes * GFS2_NBBY;
/* header block */
} else if (x == 0) {
bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
bi->bi_offset = sizeof(struct gfs2_rgrp);
bi->bi_start = 0;
-   bi->bi_len = bytes;
+   bi->bi_bytes = bytes;
bi->bi_blocks = bytes * GFS2_NBBY;
/* last block */
} else if (x + 1 == length) {
bytes = bytes_left;
bi->bi_offset = sizeof(struct gfs2_meta_header);
bi->bi_start = rgd->rd_bitbytes - bytes_left;
-   bi->bi_len = bytes;
+   bi->bi_bytes = bytes;
bi->bi_blocks = bytes * GFS2_NBBY;
/* other blocks */
} else {
@@ -802,7 +802,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
sizeof(struct gfs2_meta_header);
bi->bi_offset = sizeof(struct gfs2_meta_header);
bi->bi_start = rgd->rd_bitbytes - bytes_left;
-   

[Cluster-devel] [PATCH 08/11] gfs2: Add per-reservation reserved block accounting

2018-10-05 Thread Andreas Gruenbacher
Add a rs_reserved field to struct gfs2_blkreserv to keep track of the
number of blocks reserved by this particular reservation.  When making a
reservation with gfs2_inplace_reserve, this field is set to somewhere
between ap->min_target and ap->target depending on the number of free
blocks in the resource group.  When allocating blocks with
gfs2_alloc_blocks, rs_reserved is decremented accordingly.  Eventually,
any reserved but not consumed blocks are returned to the resource group
by gfs2_inplace_release (via gfs2_adjust_reservation).

The reservation tree (rd_rstree) is unaffected by this change: the
reservations it tracks are still advisory, and the sizes of those
reservations (rs_free) are still determined by the tentative allocation
sizes (i_sizehint).  Since rd_reserved now tracks the number of reserved
blocks rather than the number of tentatively rd_reserved blocks, we may
end up with slightly different allocation patterns, though. The
rd_extfail_pt optimization will still cause ill-suited resource groups
to be skipped quickly.

We expect to augment this with a patch that will reserve an extent of
blocks rather than just reserving a number of blocks in
gfs2_inplace_reserve.  gfs2_alloc_blocks will then be able to consume
that reserved extent before scanning for additional available blocks;
this should eliminate double bitmap scanning in most cases.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/file.c   |  4 +--
 fs/gfs2/incore.h |  1 +
 fs/gfs2/rgrp.c   | 66 +---
 fs/gfs2/trace_gfs2.h |  8 --
 4 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e8864ff2ed03..12c19e3fcb1b 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1008,8 +1008,8 @@ static long __gfs2_fallocate(struct file *file, int mode, 
loff_t offset, loff_t
goto out_qunlock;
 
/* check if the selected rgrp limits our max_blks further */
-   if (ap.allowed && ap.allowed < max_blks)
-   max_blks = ap.allowed;
+   if (ip->i_res.rs_reserved < max_blks)
+   max_blks = ip->i_res.rs_reserved;
 
/* Almost done. Calculate bytes that can be written using
 * max_blks. We also recompute max_bytes, data_blocks and
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0ed28fbc73b4..932e63924f7e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -288,6 +288,7 @@ struct gfs2_blkreserv {
struct gfs2_rgrpd *rs_rgd;
u64 rs_start; /* start of reservation */
u32 rs_free;  /* how many blocks are still free */
+   u32 rs_reserved;  /* number of reserved blocks */
 };
 
 /*
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ee981085db33..ef6768bcff21 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -674,9 +674,6 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
if (rs->rs_free) {
struct gfs2_bitmap *start, *last;
 
-   /* return reserved blocks to the rgrp */
-   BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
-   rs->rs_rgd->rd_reserved -= rs->rs_free;
/* The rgrp extent failure point is likely not to increase;
   it will only do so if the freed blocks are somehow
   contiguous with a span of free blocks that follows. Still,
@@ -1543,39 +1540,27 @@ static void rs_insert(struct gfs2_inode *ip)
 
rb_link_node(>rs_node, parent, newn);
rb_insert_color(>rs_node, >rd_rstree);
-
-   /* Do our rgrp accounting for the reservation */
-   rgd->rd_reserved += rs->rs_free; /* blocks reserved */
spin_unlock(>rd_rsspin);
trace_gfs2_rs(rs, TRACE_RS_INSERT);
 }
 
 /**
- * rgd_free - return the number of free blocks we can allocate.
+ * rgd_free - compute the number of blocks we can allocate
  * @rgd: the resource group
  *
- * This function returns the number of free blocks for an rgrp.
- * That's the clone-free blocks (blocks that are free, not including those
- * still being used for unlinked files that haven't been deleted.)
- *
- * It also subtracts any blocks reserved by someone else, but does not
- * include free blocks that are still part of our current reservation,
- * because obviously we can (and will) allocate them.
+ * Compute the number of blocks we can allocate in @rgd.  That's the clone-free
+ * blocks (blocks that are free, not including those still being used for
+ * unlinked files that haven't been deleted) minus the blocks currently
+ * reserved by any reservations other than @rs.
  */
 static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
 {
-   u32 tot_reserved, tot_free;
-
-   if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
-   return 0;
-   tot_reserved = rgd->rd_reserved - rs->rs_free;
+   u32 free;
 
-   if 

[Cluster-devel] [PATCH 06/11] gfs2: Only use struct gfs2_rbm for bitmap manipulations

2018-10-05 Thread Andreas Gruenbacher
GFS2 uses struct gfs2_rbm to represent a filesystem block number as a
bit position within a resource group.  This representation is used in
the bitmap manipulation code to prevent excessive conversions between
block numbers and bit positions, but also in struct gfs2_blkreserv which
is part of struct gfs2_inode, to mark the start of a reservation.  In
the inode, the bit position representation makes less sense: first, the
start position is used as a block number about as often as a bit
position; second, the bit position representation makes the code
unnecessarily difficult to read.

Therefore, change struct gfs2_blkreserv to represent the start of a
reservation as a block number instead of a bit position.  (This requires
keeping track of the resource group in gfs2_blkreserv separately.) With
that change, various things can be slightly simplified, and struct
gfs2_rbm can be moved to rgrp.c.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/bmap.c   |   2 +-
 fs/gfs2/incore.h |  30 +
 fs/gfs2/rgrp.c   | 156 ++-
 fs/gfs2/trace_gfs2.h |  10 +--
 fs/gfs2/trans.h  |   2 +-
 5 files changed, 103 insertions(+), 97 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 03128ed1f34e..c192906bb5f6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1503,7 +1503,7 @@ static int sweep_bh_for_rgrps(struct gfs2_inode *ip, 
struct gfs2_holder *rd_gh,
 
/* Must be done with the rgrp glock held: */
if (gfs2_rs_active(>i_res) &&
-   rgd == ip->i_res.rs_rbm.rgd)
+   rgd == ip->i_res.rs_rgd)
gfs2_rs_deltree(>i_res);
}
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a1771d8a93be..0ed28fbc73b4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -124,31 +124,6 @@ struct gfs2_rgrpd {
struct rb_root rd_rstree;   /* multi-block reservation tree */
 };
 
-struct gfs2_rbm {
-   struct gfs2_rgrpd *rgd;
-   u32 offset; /* The offset is bitmap relative */
-   int bii;/* Bitmap index */
-};
-
-static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
-{
-   return rbm->rgd->rd_bits + rbm->bii;
-}
-
-static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
-{
-   BUG_ON(rbm->offset >= rbm->rgd->rd_data);
-   return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
-   rbm->offset;
-}
-
-static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
-  const struct gfs2_rbm *rbm2)
-{
-   return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) &&
-  (rbm1->offset == rbm2->offset);
-}
-
 enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
@@ -309,8 +284,9 @@ struct gfs2_qadata { /* quota allocation data */
 */
 
 struct gfs2_blkreserv {
-   struct rb_node rs_node;   /* link to other block reservations */
-   struct gfs2_rbm rs_rbm;   /* Start of reservation */
+   struct rb_node rs_node;   /* node within rd_rstree */
+   struct gfs2_rgrpd *rs_rgd;
+   u64 rs_start; /* start of reservation */
u32 rs_free;  /* how many blocks are still free */
 };
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 070ad493a4ec..ee6ea7d8cf44 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -49,6 +49,24 @@
 #define LBITSKIP00 (0xUL)
 #endif
 
+struct gfs2_rbm {
+   struct gfs2_rgrpd *rgd;
+   u32 offset; /* The offset is bitmap relative */
+   int bii;/* Bitmap index */
+};
+
+static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
+{
+   return rbm->rgd->rd_bits + rbm->bii;
+}
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+   BUG_ON(rbm->offset >= rbm->rgd->rd_data);
+   return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
+   rbm->offset;
+}
+
 /*
  * These routines are used by the resource group routines (rgrp.c)
  * to keep track of block allocation.  Each block is represented by two
@@ -184,7 +202,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 
mask, u8 state)
 
 /**
  * rs_cmp - multi-block reservation range compare
- * @blk: absolute file system block number of the new reservation
+ * @start: start of the new reservation
  * @len: number of blocks in the new reservation
  * @rs: existing reservation to compare against
  *
@@ -192,13 +210,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 
mask, u8 state)
  * -1 if the block range is before the start of the reservation
  *  0 if the block range overlaps with the reservation
  */
-static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
 {
-   u64 

[Cluster-devel] [PATCH 11/11] gfs2: Add local resource group locking

2018-10-05 Thread Andreas Gruenbacher
From: Bob Peterson 

Prepare for treating resource group glocks as exclusive among nodes but
shared among all tasks running on a node: introduce another layer of
node-specific locking that the local tasks can use to coordinate their
accesses.

This patch only introduces the local locking changes necessary so that
future patches can introduce resource group glock sharing.  We replace
the resource group spinlock with a mutex; whether that leads to
noticeable additional contention on the resource group mutex remains to
be seen.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/incore.h |  3 +-
 fs/gfs2/lops.c   |  5 ++-
 fs/gfs2/rgrp.c   | 97 +++-
 fs/gfs2/rgrp.h   |  4 ++
 4 files changed, 81 insertions(+), 28 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 932e63924f7e..2fa47b476eef 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DIO_WAIT   0x0010
 #define DIO_METADATA   0x0020
@@ -120,7 +121,7 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_ERROR 0x4000 /* error in rg */
 #define GFS2_RDF_PREFERRED 0x8000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK  0xf000 /* mask for internal flags */
-   spinlock_t rd_rsspin;   /* protects reservation related vars */
+   struct mutex rd_lock;
struct rb_root rd_rstree;   /* multi-block reservation tree */
 };
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4c7069b8f3c1..a9e858e01c97 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
 
+   rgrp_lock_local(rgd);
if (bi->bi_clone == NULL)
-   return;
+   goto out;
if (sdp->sd_args.ar_discard)
gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, 
NULL);
memcpy(bi->bi_clone + bi->bi_offset,
@@ -85,6 +86,8 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
clear_bit(GBF_FULL, >bi_flags);
rgd->rd_free_clone = rgd->rd_free;
rgd->rd_extfail_pt = rgd->rd_free;
+out:
+   rgrp_unlock_local(rgd);
 }
 
 /**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8a6b41f3667c..a89be4782c15 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -702,10 +702,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 
rgd = rs->rs_rgd;
if (rgd) {
-   spin_lock(>rd_rsspin);
+   rgrp_lock_local(rgd);
__rs_deltree(rs);
BUG_ON(rs->rs_free);
-   spin_unlock(>rd_rsspin);
+   rgrp_unlock_local(rgd);
}
 }
 
@@ -737,12 +737,12 @@ static void return_all_reservations(struct gfs2_rgrpd 
*rgd)
struct rb_node *n;
struct gfs2_blkreserv *rs;
 
-   spin_lock(>rd_rsspin);
+   rgrp_lock_local(rgd);
while ((n = rb_first(>rd_rstree))) {
rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
__rs_deltree(rs);
}
-   spin_unlock(>rd_rsspin);
+   rgrp_unlock_local(rgd);
 }
 
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
@@ -948,7 +948,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
-   spin_lock_init(>rd_rsspin);
+   mutex_init(>rd_lock);
 
error = compute_bitstructs(rgd);
if (error)
@@ -1469,9 +1469,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Trim each bitmap in the rgrp */
for (x = 0; x < rgd->rd_length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
+   rgrp_lock_local(rgd);
ret = gfs2_rgrp_send_discards(sdp,
rgd->rd_data0, NULL, bi, minlen,
);
+   rgrp_unlock_local(rgd);
if (ret) {
gfs2_glock_dq_uninit();
goto out;
@@ -1483,9 +1485,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
if (ret == 0) {
bh = rgd->rd_bits[0].bi_bh;
+   rgrp_lock_local(rgd);
rgd->rd_flags |= GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, bh);
gfs2_rgrp_out(rgd, bh->b_data);
+   rgrp_unlock_local(rgd);

[Cluster-devel] [PATCH 02/11] gfs2: Move rs_{sizehint, rgd_gh} fields into the inode

2018-10-05 Thread Andreas Gruenbacher
Move the rs_sizehint and rs_rgd_gh fields from struct gfs2_blkreserv
into the inode: they are more closely related to the inode than to a
particular reservation.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/file.c   |  4 ++--
 fs/gfs2/incore.h |  6 ++
 fs/gfs2/main.c   |  2 ++
 fs/gfs2/rgrp.c   | 16 +++-
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 08369c6cd127..e8864ff2ed03 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -347,8 +347,8 @@ static void gfs2_size_hint(struct file *filep, loff_t 
offset, size_t size)
size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> 
sdp->sd_sb.sb_bsize_shift;
int hint = min_t(size_t, INT_MAX, blks);
 
-   if (hint > atomic_read(>i_res.rs_sizehint))
-   atomic_set(>i_res.rs_sizehint, hint);
+   if (hint > atomic_read(>i_sizehint))
+   atomic_set(>i_sizehint, hint);
 }
 
 /**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b96d39c28e17..9d7d9bd8c3a9 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -309,10 +309,6 @@ struct gfs2_qadata { /* quota allocation data */
 */
 
 struct gfs2_blkreserv {
-   /* components used during write (step 1): */
-   atomic_t rs_sizehint; /* hint of the write size */
-
-   struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
struct rb_node rs_node;   /* link to other block reservations */
struct gfs2_rbm rs_rbm;   /* Start of reservation */
u32 rs_free;  /* how many blocks are still free */
@@ -417,8 +413,10 @@ struct gfs2_inode {
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_qadata *i_qadata; /* quota allocation data */
+   struct gfs2_holder i_rgd_gh;
struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */
u64 i_goal; /* goal block for allocations */
+   atomic_t i_sizehint;  /* hint of the write size */
struct rw_semaphore i_rw_mutex;
struct list_head i_ordered;
struct list_head i_trunc_list;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 2d55e2cc..c7603063f861 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -39,9 +39,11 @@ static void gfs2_init_inode_once(void *foo)
struct gfs2_inode *ip = foo;
 
inode_init_once(>i_inode);
+   atomic_set(>i_sizehint, 0);
init_rwsem(>i_rw_mutex);
INIT_LIST_HEAD(>i_trunc_list);
ip->i_qadata = NULL;
+   gfs2_holder_mark_uninitialized(>i_rgd_gh);
memset(>i_res, 0, sizeof(ip->i_res));
RB_CLEAR_NODE(>i_res.rs_node);
ip->i_hash_cache = NULL;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c9caddc2627c..34122c546576 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1564,7 +1564,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct 
gfs2_inode *ip,
if (S_ISDIR(inode->i_mode))
extlen = 1;
else {
-   extlen = max_t(u32, atomic_read(>rs_sizehint), ap->target);
+   extlen = max_t(u32, atomic_read(>i_sizehint), ap->target);
extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
}
if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
@@ -2076,7 +2076,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct 
gfs2_alloc_parms *ap)
}
error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
   LM_ST_EXCLUSIVE, flags,
-  >rs_rgd_gh);
+  >i_rgd_gh);
if (unlikely(error))
return error;
if (!gfs2_rs_active(rs) && (loops < 2) &&
@@ -2085,7 +2085,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct 
gfs2_alloc_parms *ap)
if (sdp->sd_args.ar_rgrplvb) {
error = update_rgrp_lvb(rs->rs_rbm.rgd);
if (unlikely(error)) {
-   gfs2_glock_dq_uninit(>rs_rgd_gh);
+   gfs2_glock_dq_uninit(>i_rgd_gh);
return error;
}
}
@@ -2128,7 +2128,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct 
gfs2_alloc_parms *ap)
 
/* Unlock rgrp if required */
if (!rg_locked)
-   gfs2_glock_dq_uninit(>rs_rgd_gh);
+   gfs2_glock_dq_uninit(>i_rgd_gh);
 next_rgrp:
/* Find the next rgrp, and continue looking */
if (gfs2_select_rgrp(>rs_rbm.rgd, begin))
@@ -2165,10 +2165,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct 
gfs2_alloc_parms *ap)
 
 void 

[Cluster-devel] [PATCH 03/11] gfs2: Remove unused RGRP_RSRV_MINBYTES definition

2018-10-05 Thread Andreas Gruenbacher
This definition is only used to define RGRP_RSRV_MINBLKS, with no
benefit over defining RGRP_RSRV_MINBLKS directly.

In addition, instead of forcing RGRP_RSRV_MINBLKS to be of type u32,
cast it to that type where that type is required.

Signed-off-by: Andreas Gruenbacher 
---
 fs/gfs2/rgrp.c | 2 +-
 fs/gfs2/rgrp.h | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 34122c546576..52e5a0f24c9f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1565,7 +1565,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct 
gfs2_inode *ip,
extlen = 1;
else {
extlen = max_t(u32, atomic_read(>i_sizehint), ap->target);
-   extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+   extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
}
if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
return;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index e90478e2f545..6bb5ee112324 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -18,8 +18,7 @@
  * By reserving 32 blocks at a time, we can optimize / shortcut how we search
  * through the bitmaps by looking a word at a time.
  */
-#define RGRP_RSRV_MINBYTES 8
-#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+#define RGRP_RSRV_MINBLKS 32
 #define RGRP_RSRV_ADDBLKS 64
 
 struct gfs2_rgrpd;
-- 
2.17.1



[Cluster-devel] [PATCH 00/11] gfs2: Prepare for resource group glock sharing

2018-10-05 Thread Andreas Gruenbacher
Here is a set of patches that are meant to prepare gfs2 for Bob's
resource group glock sharing patches.  The patches have only survived
light testing so far, and Bob's remaining patches haven't been ported in
top of this patch queue yet, so I'm mainly looking for feedback on the
mode of operation of these patches at this point.

Thanks,
Andreas

Andreas Gruenbacher (9):
  gfs2: Always check the result of gfs2_rbm_from_block
  gfs2: Move rs_{sizehint, rgd_gh} fields into the inode
  gfs2: Remove unused RGRP_RSRV_MINBYTES definition
  gfs2: Rename bitmap.bi_{len => bytes}
  gfs2: Fix some minor typos
  gfs2: Only use struct gfs2_rbm for bitmap manipulations
  gfs2: Fix marking bitmaps non-full
  gfs2: Add per-reservation reserved block accounting
  gfs2: Pass resource group to rgblk_free

Bob Peterson (2):
  gfs2: Remove unnecessary gfs2_rlist_alloc parameter
  gfs2: Add local resource group locking

 fs/gfs2/bmap.c   |   6 +-
 fs/gfs2/dir.c|   7 +-
 fs/gfs2/file.c   |   8 +-
 fs/gfs2/incore.h |  42 +
 fs/gfs2/lops.c   |   7 +-
 fs/gfs2/main.c   |   2 +
 fs/gfs2/quota.c  |   2 +-
 fs/gfs2/rgrp.c   | 412 +--
 fs/gfs2/rgrp.h   |  15 +-
 fs/gfs2/trace_gfs2.h |  18 +-
 fs/gfs2/trans.h  |   2 +-
 fs/gfs2/xattr.c  |  18 +-
 12 files changed, 304 insertions(+), 235 deletions(-)

-- 
2.17.1



[Cluster-devel] [GFS2 PATCH] gfs2: Use fs_* functions instead of pr_* where we can

2018-10-05 Thread Bob Peterson
Hi,

Earlier this week, I posted a patch that converted a few pr_warn
messages to fs_warn. Following Steve Whitehouse's suggestion, I
extended this concept and converted almost all such messages.
Here, then, is the replacement patch.

Bob Peterson
---
Before this patch, various errors and messages were reported using
the pr_* functions: pr_err, pr_warn, pr_info, etc., but that does
not tell you which gfs2 mount had the problem, which is often vital
to debugging. This patch changes the calls from pr_* to fs_* in
most of the messages so that the file system id is printed along
with the message.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/dir.c  | 21 -
 fs/gfs2/glock.c| 17 +
 fs/gfs2/lock_dlm.c | 10 +-
 fs/gfs2/rgrp.c | 28 +---
 fs/gfs2/trans.c| 15 ---
 fs/gfs2/util.h |  2 +-
 6 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index e37002560c11..2e28fc947f7f 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -506,7 +506,8 @@ static int gfs2_dirent_gather(const struct gfs2_dirent 
*dent,
  * For now the most important thing is to check that the various sizes
  * are correct.
  */
-static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
+static int gfs2_check_dirent(struct gfs2_sbd *sdp,
+struct gfs2_dirent *dent, unsigned int offset,
 unsigned int size, unsigned int len, int first)
 {
const char *msg = "gfs2_dirent too small";
@@ -528,12 +529,12 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, 
unsigned int offset,
goto error;
return 0;
 error:
-   pr_warn("%s: %s (%s)\n",
+   fs_warn(sdp, "%s: %s (%s)\n",
__func__, msg, first ? "first in block" : "not first in block");
return -EIO;
 }
 
-static int gfs2_dirent_offset(const void *buf)
+static int gfs2_dirent_offset(struct gfs2_sbd *sdp, const void *buf)
 {
const struct gfs2_meta_header *h = buf;
int offset;
@@ -552,7 +553,8 @@ static int gfs2_dirent_offset(const void *buf)
}
return offset;
 wrong_type:
-   pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type));
+   fs_warn(sdp, "%s: wrong block type %u\n", __func__,
+   be32_to_cpu(h->mh_type));
return -1;
 }
 
@@ -566,7 +568,7 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode 
*inode, void *buf,
unsigned size;
int ret = 0;
 
-   ret = gfs2_dirent_offset(buf);
+   ret = gfs2_dirent_offset(GFS2_SB(inode), buf);
if (ret < 0)
goto consist_inode;
 
@@ -574,7 +576,7 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode 
*inode, void *buf,
prev = NULL;
dent = buf + offset;
size = be16_to_cpu(dent->de_rec_len);
-   if (gfs2_check_dirent(dent, offset, size, len, 1))
+   if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1))
goto consist_inode;
do {
ret = scan(dent, name, opaque);
@@ -586,7 +588,8 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode 
*inode, void *buf,
prev = dent;
dent = buf + offset;
size = be16_to_cpu(dent->de_rec_len);
-   if (gfs2_check_dirent(dent, offset, size, len, 0))
+   if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size,
+ len, 0))
goto consist_inode;
} while(1);
 
@@ -1043,7 +1046,7 @@ static int dir_split_leaf(struct inode *inode, const 
struct qstr *name)
len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth));
half_len = len >> 1;
if (!half_len) {
-   pr_warn("i_depth %u lf_depth %u index %u\n",
+   fs_warn(GFS2_SB(inode), "i_depth %u lf_depth %u index %u\n",
dip->i_depth, be16_to_cpu(oleaf->lf_depth), index);
gfs2_consist_inode(dip);
error = -EIO;
@@ -1351,7 +1354,7 @@ static int gfs2_set_cookies(struct gfs2_sbd *sdp, struct 
buffer_head *bh,
if (!sdp->sd_args.ar_loccookie)
continue;
offset = (char *)(darr[i]) -
-(bh->b_data + gfs2_dirent_offset(bh->b_data));
+   (bh->b_data + gfs2_dirent_offset(sdp, bh->b_data));
offset /= GFS2_MIN_DIRENT_SIZE;
offset += leaf_nr * sdp->sd_max_dents_per_leaf;
if (offset >= GFS2_USE_HASH_FLAG ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 4614ee25f621..05431324b262 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -494,7 +494,8 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned 
int ret)
do_xmote(gl, gh, LM_ST_UNLOCKED);
break;
default: /* Everything else */
-