Hi,

Here is my replacement GFS2 file defragmentation patch for upstream:

Description:

This patch reduces GFS2 file fragmentation by pre-reserving blocks.
In this patch, I've implemented almost all of Steve's suggestions.

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpete...@redhat.com>
---
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d957a8..205eddc 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
        if (error)
                goto out_rlist;
 
+       if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock 
held */
+               gfs2_rs_treedel(ip->i_res, true);
+
        error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
                                 RES_INDIRECT + RES_STATFS + RES_QUOTA,
                                 revokes);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6fbf3cb..779b6222 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, 
struct vm_fault *vmf)
        if (ret)
                return ret;
 
+       atomic_set(&ip->i_res->rs_sizehint,
+                  PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);
+
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
        if (ret)
@@ -571,22 +574,15 @@ fail:
 
 static int gfs2_release(struct inode *inode, struct file *file)
 {
-       struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-       struct gfs2_file *fp;
        struct gfs2_inode *ip = GFS2_I(inode);
 
-       fp = file->private_data;
+       kfree(file->private_data);
        file->private_data = NULL;
 
-       if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
-           (atomic_read(&inode->i_writecount) == 1))
+       if ((file->f_mode & FMODE_WRITE) && gfs2_rs_active(ip->i_res) &&
+           (atomic_read(&inode->i_writecount) == 1)) {
                gfs2_rs_delete(ip);
-
-       if (gfs2_assert_warn(sdp, fp))
-               return -EIO;
-
-       kfree(fp);
-
+       }
        return 0;
 }
 
@@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, 
const struct iovec *iov,
                                   unsigned long nr_segs, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
+       size_t writesize = iov_length(iov, nr_segs);
        struct dentry *dentry = file->f_dentry;
        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_sbd *sdp;
        int ret;
 
+       sdp = GFS2_SB(file->f_mapping->host);
        ret = gfs2_rs_alloc(ip);
        if (ret)
                return ret;
 
+       atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
        if (file->f_flags & O_APPEND) {
                struct gfs2_holder gh;
 
@@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, 
loff_t offset,
        if (unlikely(error))
                goto out_uninit;
 
+       atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);
+
        while (len > 0) {
                if (len < bytes)
                        bytes = len;
@@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, 
loff_t offset,
                        offset += bytes;
                        continue;
                }
-               error = gfs2_rindex_update(sdp);
-               if (error)
-                       goto out_unlock;
-
                error = gfs2_quota_lock_check(ip);
                if (error)
                        goto out_unlock;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index dc73070..f7bd2e3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -84,6 +84,7 @@ struct gfs2_rgrpd {
        u32 rd_data;                    /* num of data blocks in rgrp */
        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        u32 rd_free;
+       u32 rd_reserved;                /* number of blocks reserved */
        u32 rd_free_clone;
        u32 rd_dinodes;
        u64 rd_igeneration;
@@ -91,11 +92,15 @@ struct gfs2_rgrpd {
        struct gfs2_sbd *rd_sbd;
        struct gfs2_rgrp_lvb *rd_rgl;
        u32 rd_last_alloc;
+       u32 rd_next_rsrv;               /* preferred next reservation */
        u32 rd_flags;
 #define GFS2_RDF_CHECK         0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE      0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR         0x40000000 /* error in rg */
 #define GFS2_RDF_MASK          0xf0000000 /* mask for internal flags */
+       struct mutex rd_mutex;          /* protects reservation related vars */
+       struct rb_root rd_rstree;       /* multi-block reservation tree */
+       u32 rd_rs_cnt;                  /* count of current reservations */
 };
 
 enum gfs2_state_bits {
@@ -233,6 +238,41 @@ struct gfs2_holder {
        unsigned long gh_ip;
 };
 
+/* Resource group multi-block reservation, in order of appearance:
+
+   Step 1. Function prepares to write, allocates a mb, sets the size hint.
+   Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
+   Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
+   Step 4. Bits are assigned from the rgrp based on either the reservation
+           or wherever it can.
+*/
+
+struct gfs2_blkreserv {
+       /* components used during write (step 1): */
+       u64 rs_inum; /* inode address */
+       atomic_t rs_sizehint;         /* hint of the write size */
+
+       /* components used during inplace_reserve (step 2): */
+       u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
+
+       /* components used during get_local_rgrp (step 3): */
+       struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
+       struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
+       struct rb_node rs_node;       /* link to other block reservations */
+
+       /* components used during block searches and assignments (step 4): */
+       struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
+       u64 rs_start;                 /* absolute fs block address */
+       u32 rs_biblk;                 /* start block relative to the bi */
+       u32 rs_blks;                  /* number of blocks reserved */
+       u32 rs_free;                  /* how many blocks are still free */
+
+       /* ancillary quota stuff */
+       struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+       struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+       unsigned int rs_qa_qd_num;
+};
+
 enum {
        GLF_LOCK                        = 1,
        GLF_DEMOTE                      = 3,
@@ -290,16 +330,6 @@ struct gfs2_glock {
 
 #define GFS2_MIN_LVB_SIZE 32   /* Min size of LVB that gfs2 supports */
 
-struct gfs2_blkreserv {
-       u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-       struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
-
-       /* ancillary quota stuff */
-       struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
-       struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
-       unsigned int rs_qa_qd_num;
-};
-
 enum {
        GIF_INVALID             = 0,
        GIF_QD_LOCKED           = 1,
@@ -307,7 +337,6 @@ enum {
        GIF_SW_PAGED            = 3,
 };
 
-
 struct gfs2_inode {
        struct inode i_inode;
        u64 i_no_addr;
@@ -318,7 +347,7 @@ struct gfs2_inode {
        struct gfs2_glock *i_gl; /* Move into i_gh? */
        struct gfs2_holder i_iopen_gh;
        struct gfs2_holder i_gh; /* for prepare/commit_write only */
-       struct gfs2_blkreserv *i_res; /* resource group block reservation */
+       struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
        struct gfs2_rgrpd *i_rgd;
        u64 i_goal;     /* goal block for allocations */
        struct rw_semaphore i_rw_mutex;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b035e0..12c7b6a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct 
gfs2_glock *gl,
        int error;
 
        munge_mode_uid_gid(dip, &mode, &uid, &gid);
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
 
        error = gfs2_quota_lock(dip, uid, gid);
        if (error)
@@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const 
struct qstr *name,
        struct buffer_head *dibh;
        int error;
 
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
+
        error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
                goto fail;
@@ -596,7 +603,8 @@ fail_end_trans:
        gfs2_trans_end(sdp);
 
 fail_ipreserv:
-       gfs2_inplace_release(dip);
+       if (alloc_required)
+               gfs2_inplace_release(dip);
 
 fail_quota_locks:
        gfs2_quota_unlock(dip);
@@ -738,6 +746,7 @@ fail_gunlock:
                iput(inode);
        }
 fail:
+       gfs2_rs_delete(dip);
        if (bh)
                brelse(bh);
        return error;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e53d0a1..4d1f4f8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,6 +35,9 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+#define RSRV_CONTENTION_FACTOR 2
+#define RGRP_RSRV_MAX_CONTENDERS 4
+
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0x55555555UL)
 #define LBITSKIP55 (0x55555555UL)
@@ -178,6 +181,60 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 
mask, u8 state)
 }
 
 /**
+ * rs_cmp - multi-block reservation range compare
+ * @blk: absolute file system block number of the new reservation
+ * @len: number of blocks in the new reservation
+ * @rs: existing reservation to compare against
+ *
+ * returns: 1 if the block range is beyond the reach of the reservation
+ *         -1 if the block range is before the start of the reservation
+ *          0 if the block range overlaps with the reservation
+ */
+static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+{
+       if (blk > rs->rs_start + rs->rs_blks - 1)
+               return 1;
+       if (blk + len - 1 < rs->rs_start)
+               return -1;
+       return 0;
+}
+
+/**
+ * rs_find - Find a rgrp multi-block reservation that contains a given block
+ * @rgd: The rgrp
+ * @rgblk: The block we're looking for, relative to the rgrp
+ */
+static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
+{
+       struct rb_node **newn;
+       int rc;
+       u64 fsblk = rgblk + rgd->rd_data0;
+
+       mutex_lock(&rgd->rd_mutex);
+       newn = &rgd->rd_rstree.rb_node;
+       while (*newn) {
+               struct gfs2_blkreserv *cur =
+                       rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+               rc = rs_cmp(fsblk, 1, cur);
+               if (rc < 0)
+                       newn = &((*newn)->rb_left);
+               else if (rc > 0)
+                       newn = &((*newn)->rb_right);
+               else {
+                       mutex_unlock(&rgd->rd_mutex);
+                       return cur;
+               }
+       }
+       mutex_unlock(&rgd->rd_mutex);
+       return NULL;
+}
+
+static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
+{
+       return (bi->bi_start * GFS2_NBBY) + blk;
+}
+
+/**
  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
  *       a block in a given allocation state.
  * @buf: the buffer that holds the bitmaps
@@ -435,8 +492,66 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
        return error;
 }
 
+static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+{
+       gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u i:%u f:%u\n",
+                      rs->rs_rgd->rd_addr, rs->rs_start, rs->rs_biblk,
+                      rs->rs_blks, rs->rs_free);
+}
+
+/**
+ * gfs2_rs_treedel - remove a multi-block reservation from the rgd tree
+ * @ip: The inode for this reservation
+ * @lock: we need to lock the rgrp
+ *
+ */
+void gfs2_rs_treedel(struct gfs2_blkreserv *rs, bool lock)
+{
+       struct gfs2_rgrpd *rgd;
+
+       if (!gfs2_rs_active(rs))
+               return;
+
+       rgd = rs->rs_rgd;
+       /* We can't do this: The reason is that when the rgrp is invalidated,
+          it's in the "middle" of acquiring the glock, but the HOLDER bit
+          isn't set yet:
+          BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
+       trace_gfs2_rs(rs, 0, TRACE_RS_TREEDEL);
+
+       if (lock)
+               mutex_lock(&rgd->rd_mutex);
+       /* Now that we have the mutex, check again if the tree is empty just
+          in case someone else deleted this entry. */
+       if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
+               rb_erase(&rs->rs_node, &rgd->rd_rstree);
+       BUG_ON(!rgd->rd_rs_cnt);
+       rgd->rd_rs_cnt--;
+
+       if (rs->rs_free) {
+               /* return reserved blocks to the rgrp and the ip */
+               BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
+               rs->rs_rgd->rd_reserved -= rs->rs_free;
+               rs->rs_free = 0;
+               clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+               smp_mb__after_clear_bit();
+       }
+       if (lock)
+               mutex_unlock(&rgd->rd_mutex);
+       /* We can't change any of the step 1 or step 2 components of the rs.
+          E.g. We can't set rs_rgd to NULL because the rgd glock is held and
+          dequeued through this pointer.
+          Can't: atomic_set(&rs->rs_sizehint, 0);
+          Can't: rs->rs_requested = 0;
+          Can't: rs->rs_rgd = NULL;*/
+       rs->rs_bi = NULL;
+       rs->rs_start = 0;
+       rs->rs_biblk = 0;
+       rs->rs_blks = 0;
+}
+
 /**
- * gfs2_rs_delete - delete a reservation
+ * gfs2_rs_delete - delete a multi-block reservation
  * @ip: The inode for this reservation
  *
  */
@@ -444,12 +559,37 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
 {
        down_write(&ip->i_rw_mutex);
        if (ip->i_res) {
+               if (gfs2_rs_active(ip->i_res))
+                       gfs2_rs_treedel(ip->i_res, true);
+               trace_gfs2_rs(ip->i_res, ip->i_res->rs_start, TRACE_RS_DELETE);
+               BUG_ON(ip->i_res->rs_free);
                kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
                ip->i_res = NULL;
        }
        up_write(&ip->i_rw_mutex);
 }
 
+/**
+ * return_all_reservations - return all reserved blocks back to the rgrp.
+ * @rgd: the rgrp that needs its space back
+ *
+ * We previously reserved a bunch of blocks for allocation. Now we need to
+ * give them back. This leave the reservation structures in tact, but removes
+ * all of their corresponding "no-fly zones".
+ */
+static void return_all_reservations(struct gfs2_rgrpd *rgd)
+{
+       struct rb_node *n;
+       struct gfs2_blkreserv *rs;
+
+       mutex_lock(&rgd->rd_mutex);
+       while ((n = rb_first(&rgd->rd_rstree))) {
+               rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+               gfs2_rs_treedel(rs, false);
+       }
+       mutex_unlock(&rgd->rd_mutex);
+}
+
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
        struct rb_node *n;
@@ -472,6 +612,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
                gfs2_free_clones(rgd);
                kfree(rgd->rd_bits);
+               return_all_reservations(rgd);
                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
        }
 }
@@ -649,6 +790,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
        rgd->rd_data = be32_to_cpu(buf.ri_data);
        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
+       mutex_init(&rgd->rd_mutex);
 
        error = compute_bitstructs(rgd);
        if (error)
@@ -1115,29 +1257,228 @@ out:
 }
 
 /**
+ * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
+ * @bi: the bitmap with the blocks
+ * @ip: the inode structure
+ * @biblk: the 32-bit block number relative to the start of the bitmap
+ * @amount: the number of blocks to reserve
+ *
+ * Returns: NULL - reservation was already taken, so not inserted
+ *          pointer to the inserted reservation
+ */
+static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
+                                      struct gfs2_inode *ip, u32 biblk,
+                                      int amount)
+{
+       struct rb_node **newn, *parent = NULL;
+       int rc;
+       struct gfs2_blkreserv *rs = ip->i_res;
+       struct gfs2_rgrpd *rgd = rs->rs_rgd;
+       u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
+
+       mutex_lock(&rgd->rd_mutex);
+       newn = &rgd->rd_rstree.rb_node;
+       BUG_ON(!ip->i_res);
+       BUG_ON(gfs2_rs_active(rs));
+       /* Figure out where to put new node */
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+       while (*newn) {
+               struct gfs2_blkreserv *cur =
+                       rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+
+               parent = *newn;
+               rc = rs_cmp(fsblock, amount, cur);
+               if (rc > 0)
+                       newn = &((*newn)->rb_right);
+               else if (rc < 0)
+                       newn = &((*newn)->rb_left);
+               else {
+                       mutex_unlock(&rgd->rd_mutex);
+                       return NULL; /* reservation already in use */
+               }
+       }
+
+       /* Do our reservation work */
+       rs = ip->i_res;
+       rs->rs_start = fsblock;
+       rs->rs_blks = amount;
+       rs->rs_free = amount;
+       rs->rs_biblk = biblk;
+       rs->rs_bi = bi;
+       rs->rs_inum = ip->i_no_addr;
+       rb_link_node(&rs->rs_node, parent, newn);
+       rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
+
+       /* Do our inode accounting for the reservation */
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+
+       /* Do our rgrp accounting for the reservation */
+       rgd->rd_reserved += amount; /* blocks reserved */
+       rgd->rd_next_rsrv = rs->rs_start + rs->rs_blks;
+       rgd->rd_rs_cnt++; /* number of in-tree reservations */
+       if (!rgrp_contains_block(rgd, rgd->rd_next_rsrv))
+               rgd->rd_next_rsrv = 0;
+       mutex_unlock(&rgd->rd_mutex);
+       trace_gfs2_rs(rs, fsblock, TRACE_RS_INSERT);
+       return rs;
+}
+
+/**
+ * unclaimed_blocks - return number of blocks that aren't spoken for
+ */
+static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
+{
+       return rgd->rd_free_clone - rgd->rd_reserved;
+}
+
+/**
+ * rg_mblk_search - find a group of multiple free blocks
+ * @rgd: the resource group descriptor
+ * @rs: the block reservation
+ * @ip: pointer to the inode for which we're reserving blocks
+ *
+ * This is very similar to rgblk_search, except we're looking for whole
+ * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
+ * on aligned dwords for speed's sake.
+ *
+ * Returns: 0 if successful or BFITNOENT if there isn't enough free space
+ */
+
+static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+{
+       struct gfs2_bitmap *bi = rgd->rd_bits;
+       const u32 length = rgd->rd_length;
+       u32 blk;
+       unsigned int buf, x, search_bytes;
+       u8 *buffer = NULL;
+       u8 *ptr, *end, *nonzero;
+       u32 goal, rsv_bytes;
+       struct gfs2_blkreserv *rs;
+       u32 best_rs_bytes, unclaimed;
+       int best_rs_blocks;
+
+       /* Find bitmap block that contains bits for goal block */
+       if (rgd->rd_next_rsrv) {
+               goal = rgd->rd_next_rsrv - rgd->rd_data0;
+               for (buf = 0; buf < length; buf++) {
+                       bi = rgd->rd_bits + buf;
+                       /* Convert scope of "goal" from rgrp-wide to within
+                          found bit block */
+                       if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+                               goal -= bi->bi_start * GFS2_NBBY;
+                               goto do_search;
+                       }
+               }
+       }
+       buf = 0;
+       goal = 0;
+
+do_search:
+       best_rs_blocks = max(atomic_read(&ip->i_res->rs_sizehint),
+                            (int)(RGRP_RSRV_MINBLKS * rgd->rd_length));
+       best_rs_bytes = (best_rs_blocks *
+                        (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
+               GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
+       best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
+       unclaimed = unclaimed_blocks(rgd);
+       if (best_rs_bytes * GFS2_NBBY > unclaimed)
+               best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
+
+       for (x = 0; x <= length; x++) {
+               bi = rgd->rd_bits + buf;
+
+               if (test_bit(GBF_FULL, &bi->bi_flags))
+                       goto skip;
+
+               WARN_ON(!buffer_uptodate(bi->bi_bh));
+               if (bi->bi_clone)
+                       buffer = bi->bi_clone + bi->bi_offset;
+               else
+                       buffer = bi->bi_bh->b_data + bi->bi_offset;
+
+               /* We have to keep the reservations aligned on u64 boundaries
+                  otherwise we could get situations where a byte can't be
+                  used because it's after a reservation, but a free bit still
+                  is within the reservation's area. */
+               ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
+               end = (buffer + bi->bi_len);
+               while (ptr < end) {
+                       rsv_bytes = 0;
+                       if ((ptr + best_rs_bytes) <= end)
+                               search_bytes = best_rs_bytes;
+                       else
+                               search_bytes = end - ptr;
+                       BUG_ON(!search_bytes);
+                       nonzero = memchr_inv(ptr, 0, search_bytes);
+                       /* If the lot is all zeroes, reserve the whole size. If
+                          there's enough zeroes to satisfy the request, use
+                          what we can. If there's not enough, keep looking. */
+                       if (nonzero == NULL)
+                               rsv_bytes = search_bytes;
+                       else if ((nonzero - ptr) * GFS2_NBBY >=
+                                ip->i_res->rs_requested)
+                               rsv_bytes = (nonzero - ptr);
+
+                       if (rsv_bytes) {
+                               blk = ((ptr - buffer) * GFS2_NBBY);
+                               BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
+                               rs = rs_insert(bi, ip, blk,
+                                              rsv_bytes * GFS2_NBBY);
+                               if (IS_ERR(rs))
+                                       return PTR_ERR(rs);
+                               if (rs)
+                                       return 0;
+                       }
+                       ptr += ALIGN(search_bytes, sizeof(u64));
+               }
+skip:
+               /* Try next bitmap block (wrap back to rgrp header
+                  if at end) */
+               buf++;
+               buf %= length;
+               goal = 0;
+       }
+
+       return BFITNOENT;
+}
+
+/**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
  * @ip: the inode
  *
  * If there's room for the requested blocks to be allocated from the RG:
+ * This will try to get a multi-block reservation first, and if that doesn't
+ * fit, it will take what it can.
  *
  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
  */
 
-static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode 
*ip)
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
        const struct gfs2_blkreserv *rs = ip->i_res;
 
        if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
                return 0;
-       if (rgd->rd_free_clone >= rs->rs_requested)
+       /* Look for a multi-block reservation. We only do this for files,
+          not directories or other types. The problem with directories is
+          that items are often created and deleted in the directory in the
+          same breath, which can create "holes". In other words, we could get
+          into situations where two or more blocks are reserved, then used,
+          then one or more of the earlier blocks is freed. When we delete the
+          reservation, the rs_free will be off due to the hole, so the rgrp's
+          rg_free count can get off. Other non-files, like symlinks and
+          char or block devices, are very small in size, so it doesn't make
+          sense to reserve multiple blocks for them. */
+       if (S_ISREG(ip->i_inode.i_mode) &&
+           unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS) {
+               if (rg_mblk_search(rgd, ip) != BFITNOENT)
+                       return 1;
+       }
+       if (unclaimed_blocks(rgd) >= rs->rs_requested)
                return 1;
-       return 0;
-}
 
-static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
-{
-       return (bi->bi_start * GFS2_NBBY) + blk;
+       return 0;
 }
 
 /**
@@ -1217,7 +1558,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 
*last_unlinked, u64 skip
 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_rgrpd *rgd, *begin = NULL;
+       struct gfs2_rgrpd *begin = NULL;
        struct gfs2_blkreserv *rs = ip->i_res;
        int error = 0, rg_locked, flags = LM_FLAG_TRY;
        u64 last_unlinked = NO_BLOCK;
@@ -1225,32 +1566,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 
requested)
 
        if (sdp->sd_args.ar_rgrplvb)
                flags |= GL_SKIP;
-       rs = ip->i_res;
        rs->rs_requested = requested;
        if (gfs2_assert_warn(sdp, requested)) {
                error = -EINVAL;
                goto out;
        }
-
-       if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
-               rgd = begin = ip->i_rgd;
-       else
-               rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
-
-       if (rgd == NULL)
+       if (gfs2_rs_active(rs)) {
+               begin = rs->rs_rgd;
+               flags = 0; /* Yoda: Do or do not. There is no try */
+       } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
+               rs->rs_rgd = begin = ip->i_rgd;
+       } else {
+               rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+       }
+       if (rs->rs_rgd == NULL)
                return -EBADSLT;
 
        while (loops < 3) {
                rg_locked = 0;
 
-               if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+               if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
                        rg_locked = 1;
                        error = 0;
+               } else if (!loops && !gfs2_rs_active(rs) &&
+                          rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
+                       /* If the rgrp already is maxed out for contenders,
+                          we can eliminate it as a "first pass" without even
+                          requesting the rgrp glock. */
+                       error = GLR_TRYFAILED;
                } else {
-                       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-                                                  flags, &rs->rs_rgd_gh);
+                       error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
+                                                  LM_ST_EXCLUSIVE, flags,
+                                                  &rs->rs_rgd_gh);
                        if (!error && sdp->sd_args.ar_rgrplvb) {
-                               error = update_rgrp_lvb(rgd);
+                               error = update_rgrp_lvb(rs->rs_rgd);
                                if (error) {
                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
                                        return error;
@@ -1259,24 +1608,36 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 
requested)
                }
                switch (error) {
                case 0:
-                       if (try_rgrp_fit(rgd, ip)) {
+                       if (gfs2_rs_active(rs)) {
+                               if (unclaimed_blocks(rs->rs_rgd) +
+                                   rs->rs_free >= rs->rs_requested) {
+                                       ip->i_rgd = rs->rs_rgd;
+                                       return 0;
+                               }
+                               /* We have a multi-block reservation, but the
+                                  rgrp doesn't have enough free blocks to
+                                  satisfy the request. Free the reservation
+                                  and look for a suitable rgrp. */
+                               gfs2_rs_treedel(rs, true);
+                       }
+                       if (try_rgrp_fit(rs->rs_rgd, ip)) {
                                if (sdp->sd_args.ar_rgrplvb)
-                                       gfs2_rgrp_bh_get(rgd);
-                               ip->i_rgd = rgd;
+                                       gfs2_rgrp_bh_get(rs->rs_rgd);
+                               ip->i_rgd = rs->rs_rgd;
                                return 0;
                        }
-                       if (rgd->rd_flags & GFS2_RDF_CHECK) {
+                       if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
                                if (sdp->sd_args.ar_rgrplvb)
-                                       gfs2_rgrp_bh_get(rgd);
-                               try_rgrp_unlink(rgd, &last_unlinked,
+                                       gfs2_rgrp_bh_get(rs->rs_rgd);
+                               try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
                                                ip->i_no_addr);
                        }
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
                        /* fall through */
                case GLR_TRYFAILED:
-                       rgd = gfs2_rgrpd_get_next(rgd);
-                       if (rgd != begin) /* If we didn't wrap */
+                       rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
+                       if (rs->rs_rgd != begin) /* If we didn't wrap */
                                break;
 
                        flags &= ~LM_FLAG_TRY;
@@ -1314,6 +1675,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
        struct gfs2_blkreserv *rs = ip->i_res;
 
+       if (!rs)
+               return;
+
+       if (gfs2_rs_active(rs) && !rs->rs_free)
+               gfs2_rs_treedel(rs, true);
+
        if (rs->rs_rgd_gh.gh_gl)
                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
        rs->rs_requested = 0;
@@ -1412,7 +1779,27 @@ do_search:
                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
                        buffer = bi->bi_clone + bi->bi_offset;
 
-               biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+               while (1) {
+                       struct gfs2_blkreserv *rs;
+                       u32 rgblk;
+
+                       biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+                       if (biblk == BFITNOENT)
+                               break;
+                       /* Check if this block is reserved() */
+                       rgblk = gfs2_bi2rgd_blk(bi, biblk);
+                       rs = rs_find(rgd, rgblk);
+                       if (rs == NULL)
+                               break;
+
+                       BUG_ON(rs->rs_bi != bi);
+                       biblk = BFITNOENT;
+                       /* This should jump to the first block after the
+                          reservation. */
+                       goal = rs->rs_biblk + rs->rs_free;
+                       if (goal >= bi->bi_len * GFS2_NBBY)
+                               break;
+               }
                if (biblk != BFITNOENT)
                        break;
 
@@ -1448,8 +1835,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, 
struct gfs2_bitmap *bi,
                             u32 blk, bool dinode, unsigned int *n)
 {
        const unsigned int elen = *n;
-       u32 goal;
+       u32 goal, rgblk;
        const u8 *buffer = NULL;
+       struct gfs2_blkreserv *rs;
 
        *n = 0;
        buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1462,6 +1850,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, 
struct gfs2_bitmap *bi,
                goal++;
                if (goal >= (bi->bi_len * GFS2_NBBY))
                        break;
+               rgblk = gfs2_bi2rgd_blk(bi, goal);
+               rs = rs_find(rgd, rgblk);
+               if (rs) /* Oops, we bumped into someone's reservation */
+                       break;
                if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
                    GFS2_BLKST_FREE)
                        break;
@@ -1538,11 +1930,19 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd 
*sdp, u64 bstart,
 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 {
        const struct gfs2_rgrpd *rgd = gl->gl_object;
+       struct gfs2_blkreserv *trs;
+       const struct rb_node *n;
+
        if (rgd == NULL)
                return 0;
-       gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+       gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-                      rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+                      rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
+                      rgd->rd_reserved);
+       for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
+               trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+               dump_rs(seq, trs);
+       }
        return 0;
 }
 
@@ -1557,10 +1957,74 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 }
 
 /**
+ * claim_reserved_blks - Claim previously reserved blocks
+ * @ip: the inode that's claiming the reservation
+ * @dinode: 1 if this block is a dinode block, otherwise data block
+ * @nblocks: desired extent length
+ *
+ * Lay claim to previously allocated block reservation blocks.
+ * Returns: Starting block number of the blocks claimed.
+ * Sets *nblocks to the actual extent length allocated.
+ */
+static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
+                              unsigned int *nblocks)
+{
+       struct gfs2_blkreserv *rs = ip->i_res;
+       struct gfs2_rgrpd *rgd = rs->rs_rgd;
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+       struct gfs2_bitmap *bi;
+       u32 biblk;
+       u64 start_block = 0, fsblock;
+       const unsigned int elen = *nblocks;
+
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+       gfs2_assert_withdraw(sdp, rgd);
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+       bi = rs->rs_bi;
+       gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+
+       for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
+               biblk = rs->rs_biblk;
+
+               fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
+               /* Check if the next free block is not contiguous with
+                  the others in the extent. */
+               if (start_block && fsblock != start_block + (*nblocks))
+                       break;
+
+               rs->rs_biblk++;
+               rs->rs_free--;
+               /* Make sure the bitmap hasn't changed */
+               gfs2_setbit(rgd, bi->bi_clone, bi, biblk, GFS2_BLKST_USED);
+
+               BUG_ON(!rgd->rd_reserved);
+               rgd->rd_reserved--;
+               if (!start_block) {
+                       start_block = fsblock;
+                       dinode = false;
+               }
+               trace_gfs2_rs(rs, fsblock, TRACE_RS_CLAIM);
+       }
+
+       if (!rs->rs_free) {
+               struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
+
+               gfs2_rs_treedel(rs, true);
+               /* -nblocks because we haven't returned to do the math yet.
+                  I'm doing the math backwards to prevent negative numbers,
+                  but think of it as:
+                  if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
+               if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
+                       rg_mblk_search(rgd, ip);
+       }
+       return start_block;
+}
+
+/**
  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
  * @ip: the inode to allocate the block for
  * @bn: Used to return the starting block number
- * @ndata: requested number of blocks/extent length (value/result)
+ * @nblocks: requested number of blocks/extent length (value/result)
  * @dinode: 1 if we're allocating a dinode block, else 0
  * @generation: the generation number of the inode
  *
@@ -1585,20 +2049,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *nblocks,
        if (ip->i_res->rs_requested == 0)
                return -ECANCELED;
 
-       rgd = ip->i_rgd;
-
-       if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-               goal = ip->i_goal - rgd->rd_data0;
-       else
-               goal = rgd->rd_last_alloc;
-
-       blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+       /* Check if we have a multi-block reservation, and if so, claim the
+          next free block from it. */
+       if (gfs2_rs_active(ip->i_res)) {
+               BUG_ON(!ip->i_res->rs_free);
+               rgd = ip->i_res->rs_rgd;
+               block = claim_reserved_blks(ip, dinode, nblocks);
+       } else {
+               rgd = ip->i_rgd;
 
-       /* Since all blocks are reserved in advance, this shouldn't happen */
-       if (blk == BFITNOENT)
-               goto rgrp_error;
+               if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
+                       goal = ip->i_goal - rgd->rd_data0;
+               else
+                       goal = rgd->rd_last_alloc;
+
+               blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+
+               /* Since all blocks are reserved in advance, this shouldn't
+                  happen */
+               if (blk == BFITNOENT) {
+                       printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
+                              *nblocks);
+                       printk(KERN_WARNING "FULL=%d\n",
+                              test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
+                       goto rgrp_error;
+               }
 
-       block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+               block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+       }
        ndata = *nblocks;
        if (dinode)
                ndata--;
@@ -1615,8 +2093,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *nblocks,
                        brelse(dibh);
                }
        }
-       if (rgd->rd_free < *nblocks)
+       if (rgd->rd_free < *nblocks) {
+               printk(KERN_WARNING "nblocks=%u\n", *nblocks);
                goto rgrp_error;
+       }
 
        rgd->rd_free -= *nblocks;
        if (dinode) {
@@ -1672,6 +2152,9 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 
bstart, u32 blen, int meta)
                return;
        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
        rgd->rd_free += blen;
+       if (bstart < rgd->rd_next_rsrv)
+               rgd->rd_next_rsrv = bstart;
+
        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 5d8314d..b604009 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -13,6 +13,14 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+/* Since each block in the file system is represented by two bits in the
+ * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
+ * By reserving 32 blocks at a time, we can optimize / shortcut how we search
+ * through the bitmaps by looking a word at a time.
+ */
+#define RGRP_RSRV_MINBYTES 8
+#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+
 struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
@@ -29,6 +37,8 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
@@ -36,6 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *n,
                             bool dinode, u64 *generation);
 
 extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_treedel(struct gfs2_blkreserv *rs, bool lock);
 extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, 
int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -62,7 +73,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 
offset,
                                   const struct gfs2_bitmap *bi, unsigned 
minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
-/* This is how to tell if a reservation is "inplace" reserved: */
+/* This is how to tell if a multi-block reservation is "inplace" reserved: */
 static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
 {
        if (ip->i_res && ip->i_res->rs_requested)
@@ -70,4 +81,12 @@ static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
        return 0;
 }
 
+/* This is how to tell if a multi-block reservation is in the rgrp tree: */
+static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
+{
+       if (rs && rs->rs_start)
+               return 1;
+       return 0;
+}
+
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 7880687..433da86 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1420,6 +1420,10 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
                return -EIO;
        }
 
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
+
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
                return error;
@@ -1550,6 +1554,9 @@ out_truncate:
 
 out_unlock:
        /* Error path for case 1 */
+       if (gfs2_rs_active(ip->i_res))
+               gfs2_rs_treedel(ip->i_res, true);
+
        if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
                gfs2_glock_dq(&ip->i_iopen_gh);
        gfs2_holder_uninit(&ip->i_iopen_gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 1b8b815..134bb9d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -31,6 +31,17 @@
                            { GFS2_BLKST_DINODE, "dinode" },    \
                            { GFS2_BLKST_UNLINKED, "unlinked" })
 
+#define TRACE_RS_DELETE  0
+#define TRACE_RS_TREEDEL 1
+#define TRACE_RS_INSERT  2
+#define TRACE_RS_CLAIM   3
+
+#define rs_func_name(x) __print_symbolic(x,    \
+                                        { 0, "del " }, \
+                                        { 1, "tdel" }, \
+                                        { 2, "ins " }, \
+                                        { 3, "clm " })
+
 #define show_glock_flags(flags) __print_flags(flags, "",       \
        {(1UL << GLF_LOCK),                     "l" },          \
        {(1UL << GLF_DEMOTE),                   "D" },          \
@@ -470,6 +481,7 @@ TRACE_EVENT(gfs2_block_alloc,
                __field(        u8,     block_state             )
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
+               __field(        u32,    rd_reserved             )
        ),
 
        TP_fast_assign(
@@ -480,16 +492,63 @@ TRACE_EVENT(gfs2_block_alloc,
                __entry->block_state    = block_state;
                __entry->rd_addr        = rgd->rd_addr;
                __entry->rd_free_clone  = rgd->rd_free_clone;
+               __entry->rd_reserved    = rgd->rd_reserved;
        ),
 
-       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
+       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long)__entry->len,
                  block_state_name(__entry->block_state),
                  (unsigned long long)__entry->rd_addr,
-                 __entry->rd_free_clone)
+                 __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+);
+
+/* Keep track of multi-block reservations as they are allocated/freed */
+TRACE_EVENT(gfs2_rs,
+
+       TP_PROTO(const struct gfs2_blkreserv *rs, u64 claimed, u8 func),
+
+       TP_ARGS(rs, claimed, func),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        u64,    rd_addr                 )
+               __field(        u32,    rd_free_clone           )
+               __field(        u32,    rd_reserved             )
+               __field(        u64,    inum                    )
+               __field(        u64,    start                   )
+               __field(        u32,    len                     )
+               __field(        u64,    claimed                 )
+               __field(        u32,    free                    )
+               __field(        u8,     func                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = rs->rs_rgd ? 
rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
+               __entry->rd_addr        = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
+               __entry->rd_free_clone  = rs->rs_rgd ? 
rs->rs_rgd->rd_free_clone : 0;
+               __entry->rd_reserved    = rs->rs_rgd ? rs->rs_rgd->rd_reserved 
: 0;
+               __entry->inum           = rs->rs_inum;
+               __entry->start          = rs->rs_start;
+               __entry->len            = rs->rs_blks;
+               __entry->claimed        = claimed;
+               __entry->free           = rs->rs_free;
+               __entry->func           = func;
+       ),
+
+       TP_printk("%u,%u bmap %llu resrv %llu/%lu rg:%llu rf:%lu rr:%lu %s "
+                 "c:%llu f:%lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->inum,
+                 (unsigned long long)__entry->start,
+                 (unsigned long)__entry->len,
+                 (unsigned long long)__entry->rd_addr,
+                 (unsigned long)__entry->rd_free_clone,
+                 (unsigned long)__entry->rd_reserved,
+                 rs_func_name(__entry->func),
+                 __entry->claimed, (unsigned long)__entry->free)
 );
 
 #endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 523c0de..27a0b4a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -327,6 +327,10 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, 
struct buffer_head *bh,
 {
        int error;
 
+       error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+       if (error)
+               return error;
+
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
                goto out_alloc;
@@ -710,6 +714,10 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct 
gfs2_ea_request *er,
        struct buffer_head *dibh;
        int error;
 
+       error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+       if (error)
+               return error;
+
        error = gfs2_quota_lock_check(ip);
        if (error)
                return error;
@@ -1483,6 +1491,10 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
        int error;
 
+       error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+       if (error)
+               return error;
+
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
                return error;

Reply via email to