Hi, With a slight tweek to take account of Abhi's patch, this is now in the -nmw tree. A really good milestone to have reached. We should now take the time to give this a really good test, and make sure that there are no other corner cases lurking anywhere,
Steve. On Wed, 2012-07-18 at 12:26 -0400, Bob Peterson wrote: > Hi, > > This is my fifth attempt at posting my file defragmentation patch > for GFS2. This patch incorporates many of the things suggested by > Steve Whitehouse in past emails. Notes: > > 1. The reservations now keep track of starting point and free space, > and the starting point advances as blocks are claimed. This eliminates > a couple variables from the reservations structure and makes it > shorter, and the code smaller. > 2. I'm using a spin_lock now to protect the reservation tree rather > than the mutex. > 3. I'm not trying to implement an Orlov allocator for the directories > for this iteration. I'm now keeping the inode blocks closer together > by having the inode create function grab an allocation on behalf of > the new inode, using the parent directory. > > Description: > > This patch reduces GFS2 file fragmentation by pre-reserving blocks. > > Regards, > > Bob Peterson > Red Hat File Systems > > Signed-off-by: Bob Peterson <rpete...@redhat.com> > --- > fs/gfs2/bmap.c | 3 + > fs/gfs2/file.c | 24 +-- > fs/gfs2/incore.h | 49 ++++- > fs/gfs2/inode.c | 37 +++- > fs/gfs2/rgrp.c | 576 > +++++++++++++++++++++++++++++++++++++++++++++----- > fs/gfs2/rgrp.h | 31 +++- > fs/gfs2/super.c | 7 + > fs/gfs2/trace_gfs2.h | 59 +++++- > fs/gfs2/xattr.c | 12 + > 9 files changed, 707 insertions(+), 91 deletions(-) > > diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c > index 6d957a8..49cd7dd 100644 > --- a/fs/gfs2/bmap.c > +++ b/fs/gfs2/bmap.c > @@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct > buffer_head *dibh, > if (error) > goto out_rlist; > > + if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock > held */ > + gfs2_rs_deltree(ip->i_res); > + > error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + > RES_INDIRECT + RES_STATFS + RES_QUOTA, > revokes); > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 6fbf3cb..9f94832 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, > struct vm_fault *vmf) > if (ret) > return ret; > > + atomic_set(&ip->i_res->rs_sizehint, > + PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize); > + > gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); > ret = gfs2_glock_nq(&gh); > if (ret) > @@ -571,22 +574,15 @@ fail: > > static int gfs2_release(struct inode *inode, struct file *file) > { > - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; > - struct gfs2_file *fp; > struct gfs2_inode *ip = GFS2_I(inode); > > - fp = file->private_data; > + kfree(file->private_data); > file->private_data = NULL; > > - if ((file->f_mode & FMODE_WRITE) && ip->i_res && > + if ((file->f_mode & FMODE_WRITE) && > (atomic_read(&inode->i_writecount) == 1)) > gfs2_rs_delete(ip); > > - if (gfs2_assert_warn(sdp, fp)) > - return -EIO; > - > - kfree(fp); > - > return 0; > } > > @@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, > const struct iovec *iov, > unsigned long nr_segs, loff_t pos) > { > struct file *file = iocb->ki_filp; > + size_t writesize = iov_length(iov, nr_segs); > struct dentry *dentry = file->f_dentry; > struct gfs2_inode *ip = GFS2_I(dentry->d_inode); > + struct gfs2_sbd *sdp; > int ret; > > + sdp = GFS2_SB(file->f_mapping->host); > ret = gfs2_rs_alloc(ip); > if (ret) > return ret; > > + atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize); > if (file->f_flags & O_APPEND) { > struct gfs2_holder gh; > > @@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, > loff_t offset, > if (unlikely(error)) > goto out_uninit; > > + atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize); > + > while (len > 0) { > if (len < bytes) > bytes = len; > @@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, > loff_t offset, > offset += bytes; > continue; > } > - error = gfs2_rindex_update(sdp); > - if (error) > - goto out_unlock; > - > error = gfs2_quota_lock_check(ip); > if (error) > goto out_unlock; > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index dc73070..aaecc80 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -84,6 +84,7 @@ struct gfs2_rgrpd { > u32 rd_data; /* num of data blocks in rgrp */ > u32 rd_bitbytes; /* number of bytes in data bitmaps */ > u32 rd_free; > + u32 rd_reserved; /* number of blocks reserved */ > u32 rd_free_clone; > u32 rd_dinodes; > u64 rd_igeneration; > @@ -96,6 +97,9 @@ struct gfs2_rgrpd { > #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ > #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ > #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ > + spinlock_t rd_rsspin; /* protects reservation related vars */ > + struct rb_root rd_rstree; /* multi-block reservation tree */ > + u32 rd_rs_cnt; /* count of current reservations */ > }; > > enum gfs2_state_bits { > @@ -233,6 +237,38 @@ struct gfs2_holder { > unsigned long gh_ip; > }; > > +/* Resource group multi-block reservation, in order of appearance: > + > + Step 1. Function prepares to write, allocates a mb, sets the size hint. > + Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info > + Step 3. Function get_local_rgrp locks the rgrp, determines which bits to > use > + Step 4. Bits are assigned from the rgrp based on either the reservation > + or wherever it can. > +*/ > + > +struct gfs2_blkreserv { > + /* components used during write (step 1): */ > + atomic_t rs_sizehint; /* hint of the write size */ > + > + /* components used during inplace_reserve (step 2): */ > + u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ > + > + /* components used during get_local_rgrp (step 3): */ > + struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ > + struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ > + struct rb_node rs_node; /* link to other block reservations */ > + > + /* components used during block searches and assignments (step 4): */ > + struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ > + u32 rs_biblk; /* start block relative to the bi */ > + u32 rs_free; /* how many blocks are still free */ > + > + /* ancillary quota stuff */ > + struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; > + struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; > + unsigned int rs_qa_qd_num; > +}; > + > enum { > GLF_LOCK = 1, > GLF_DEMOTE = 3, > @@ -290,16 +326,6 @@ struct gfs2_glock { > > #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ > > -struct gfs2_blkreserv { > - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ > - struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */ > - > - /* ancillary quota stuff */ > - struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; > - struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; > - unsigned int rs_qa_qd_num; > -}; > - > enum { > GIF_INVALID = 0, > GIF_QD_LOCKED = 1, > @@ -307,7 +333,6 @@ enum { > GIF_SW_PAGED = 3, > }; > > - > struct gfs2_inode { > struct inode i_inode; > u64 i_no_addr; > @@ -318,7 +343,7 @@ struct gfs2_inode { > struct gfs2_glock *i_gl; /* Move into i_gh? */ > struct gfs2_holder i_iopen_gh; > struct gfs2_holder i_gh; /* for prepare/commit_write only */ > - struct gfs2_blkreserv *i_res; /* resource group block reservation */ > + struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */ > struct gfs2_rgrpd *i_rgd; > u64 i_goal; /* goal block for allocations */ > struct rw_semaphore i_rw_mutex; > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c > index 2b035e0..c53c67e 100644 > --- a/fs/gfs2/inode.c > +++ b/fs/gfs2/inode.c > @@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct > gfs2_glock *gl, > int error; > > munge_mode_uid_gid(dip, &mode, &uid, &gid); > + error = gfs2_rindex_update(sdp); > + if (error) > + return error; > > error = gfs2_quota_lock(dip, uid, gid); > if (error) > @@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const > struct qstr *name, > struct buffer_head *dibh; > int error; > > + error = gfs2_rindex_update(sdp); > + if (error) > + return error; > + > error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); > if (error) > goto fail; > @@ -596,7 +603,8 @@ fail_end_trans: > gfs2_trans_end(sdp); > > fail_ipreserv: > - gfs2_inplace_release(dip); > + if (alloc_required) > + gfs2_inplace_release(dip); > > fail_quota_locks: > gfs2_quota_unlock(dip); > @@ -647,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct > dentry *dentry, > const struct qstr *name = &dentry->d_name; > struct gfs2_holder ghs[2]; > struct inode *inode = NULL; > - struct gfs2_inode *dip = GFS2_I(dir); > + struct gfs2_inode *dip = GFS2_I(dir), *ip; > struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); > struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; > int error; > @@ -657,6 +665,11 @@ static int gfs2_create_inode(struct inode *dir, struct > dentry *dentry, > if (!name->len || name->len > GFS2_FNAMESIZE) > return -ENAMETOOLONG; > > + /* We need a reservation to allocate the new dinode block. The > + directory ip temporarily points to the reservation, but this is > + being done to get a set of contiguous blocks for the new dinode. > + Since this is a create, we don't have a sizehint yet, so it will > + have to use the minimum reservation size. */ > error = gfs2_rs_alloc(dip); > if (error) > return error; > @@ -694,24 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct > dentry *dentry, > if (IS_ERR(inode)) > goto fail_gunlock2; > > - error = gfs2_inode_refresh(GFS2_I(inode)); > + ip = GFS2_I(inode); > + error = gfs2_inode_refresh(ip); > if (error) > goto fail_gunlock2; > > - /* the new inode needs a reservation so it can allocate xattrs. */ > - error = gfs2_rs_alloc(GFS2_I(inode)); > - if (error) > - goto fail_gunlock2; > + /* The newly created inode needs a reservation so it can allocate > + xattrs. At the same time, we want new blocks allocated to the new > + dinode to be as contiguous as possible. Since we allocated the > + dinode block under the directory's reservation, we transfer > + ownership of that reservation to the new inode. The directory > + doesn't need a reservation unless it needs a new allocation. */ > + ip->i_res = dip->i_res; > + dip->i_res = NULL; > > error = gfs2_acl_create(dip, inode); > if (error) > goto fail_gunlock2; > > - error = gfs2_security_init(dip, GFS2_I(inode), name); > + error = gfs2_security_init(dip, ip, name); > if (error) > goto fail_gunlock2; > > - error = link_dinode(dip, name, GFS2_I(inode)); > + error = link_dinode(dip, name, ip); > if (error) > goto fail_gunlock2; > > @@ -738,6 +756,7 @@ fail_gunlock: > iput(inode); > } > fail: > + gfs2_rs_delete(dip); > if (bh) > brelse(bh); > return error; > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c > index e53d0a1..ad37fe2 100644 > --- a/fs/gfs2/rgrp.c > +++ b/fs/gfs2/rgrp.c > @@ -35,6 +35,9 @@ > #define BFITNOENT ((u32)~0) > #define NO_BLOCK ((u64)~0) > > +#define RSRV_CONTENTION_FACTOR 4 > +#define RGRP_RSRV_MAX_CONTENDERS 2 > + > #if BITS_PER_LONG == 32 > #define LBITMASK (0x55555555UL) > #define LBITSKIP55 (0x55555555UL) > @@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 > mask, u8 state) > } > > /** > + * rs_cmp - multi-block reservation range compare > + * @blk: absolute file system block number of the new reservation > + * @len: number of blocks in the new reservation > + * @rs: existing reservation to compare against > + * > + * returns: 1 if the block range is beyond the reach of the reservation > + * -1 if the block range is before the start of the reservation > + * 0 if the block range overlaps with the reservation > + */ > +static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) > +{ > + u64 startblk = gfs2_rs_startblk(rs); > + > + if (blk >= startblk + rs->rs_free) > + return 1; > + if (blk + len - 1 < startblk) > + return -1; > + return 0; > +} > + > +/** > + * rs_find - Find a rgrp multi-block reservation that contains a given block > + * @rgd: The rgrp > + * @rgblk: The block we're looking for, relative to the rgrp > + */ > +static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) > +{ > + struct rb_node **newn; > + int rc; > + u64 fsblk = rgblk + rgd->rd_data0; > + > + spin_lock(&rgd->rd_rsspin); > + newn = &rgd->rd_rstree.rb_node; > + while (*newn) { > + struct gfs2_blkreserv *cur = > + rb_entry(*newn, struct gfs2_blkreserv, rs_node); > + rc = rs_cmp(fsblk, 1, cur); > + if (rc < 0) > + newn = &((*newn)->rb_left); > + else if (rc > 0) > + newn = &((*newn)->rb_right); > + else { > + spin_unlock(&rgd->rd_rsspin); > + return cur; > + } > + } > + spin_unlock(&rgd->rd_rsspin); > + return NULL; > +} > + > +/** > * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair > representing > * a block in a given allocation state. > * @buf: the buffer that holds the bitmaps > @@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) > int gfs2_rs_alloc(struct gfs2_inode *ip) > { > int error = 0; > + struct gfs2_blkreserv *res; > + > + if (ip->i_res) > + return 0; > + > + res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); > + if (!res) > + error = -ENOMEM; > > down_write(&ip->i_rw_mutex); > - if (!ip->i_res) { > - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); > - if (!ip->i_res) > - error = -ENOMEM; > - } > + if (ip->i_res) > + kmem_cache_free(gfs2_rsrv_cachep, res); > + else > + ip->i_res = res; > up_write(&ip->i_rw_mutex); > return error; > } > > +static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) > +{ > + gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", > + rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, > + rs->rs_free); > +} > + > /** > - * gfs2_rs_delete - delete a reservation > + * __rs_deltree - remove a multi-block reservation from the rgd tree > + * @rs: The reservation to remove > + * > + */ > +static void __rs_deltree(struct gfs2_blkreserv *rs) > +{ > + struct gfs2_rgrpd *rgd; > + > + if (!gfs2_rs_active(rs)) > + return; > + > + rgd = rs->rs_rgd; > + /* We can't do this: The reason is that when the rgrp is invalidated, > + it's in the "middle" of acquiring the glock, but the HOLDER bit > + isn't set yet: > + BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ > + trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); > + > + if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) > + rb_erase(&rs->rs_node, &rgd->rd_rstree); > + BUG_ON(!rgd->rd_rs_cnt); > + rgd->rd_rs_cnt--; > + > + if (rs->rs_free) { > + /* return reserved blocks to the rgrp and the ip */ > + BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); > + rs->rs_rgd->rd_reserved -= rs->rs_free; > + rs->rs_free = 0; > + clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); > + smp_mb__after_clear_bit(); > + } > + /* We can't change any of the step 1 or step 2 components of the rs. > + E.g. We can't set rs_rgd to NULL because the rgd glock is held and > + dequeued through this pointer. > + Can't: atomic_set(&rs->rs_sizehint, 0); > + Can't: rs->rs_requested = 0; > + Can't: rs->rs_rgd = NULL;*/ > + rs->rs_bi = NULL; > + rs->rs_biblk = 0; > +} > + > +/** > + * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree > + * @rs: The reservation to remove > + * > + */ > +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) > +{ > + struct gfs2_rgrpd *rgd; > + > + if (!gfs2_rs_active(rs)) > + return; > + > + rgd = rs->rs_rgd; > + spin_lock(&rgd->rd_rsspin); > + __rs_deltree(rs); > + spin_unlock(&rgd->rd_rsspin); > +} > + > +/** > + * gfs2_rs_delete - delete a multi-block reservation > * @ip: The inode for this reservation > * > */ > @@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip) > { > down_write(&ip->i_rw_mutex); > if (ip->i_res) { > + gfs2_rs_deltree(ip->i_res); > + trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); > + BUG_ON(ip->i_res->rs_free); > kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); > ip->i_res = NULL; > } > up_write(&ip->i_rw_mutex); > } > > +/** > + * return_all_reservations - return all reserved blocks back to the rgrp. > + * @rgd: the rgrp that needs its space back > + * > + * We previously reserved a bunch of blocks for allocation. Now we need to > + * give them back. This leave the reservation structures in tact, but removes > + * all of their corresponding "no-fly zones". > + */ > +static void return_all_reservations(struct gfs2_rgrpd *rgd) > +{ > + struct rb_node *n; > + struct gfs2_blkreserv *rs; > + > + spin_lock(&rgd->rd_rsspin); > + while ((n = rb_first(&rgd->rd_rstree))) { > + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); > + __rs_deltree(rs); > + } > + spin_unlock(&rgd->rd_rsspin); > +} > + > void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) > { > struct rb_node *n; > @@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) > > gfs2_free_clones(rgd); > kfree(rgd->rd_bits); > + return_all_reservations(rgd); > kmem_cache_free(gfs2_rgrpd_cachep, rgd); > } > } > @@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) > rgd->rd_data0 = be64_to_cpu(buf.ri_data0); > rgd->rd_data = be32_to_cpu(buf.ri_data); > rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); > + spin_lock_init(&rgd->rd_rsspin); > > error = compute_bitstructs(rgd); > if (error) > @@ -1115,29 +1269,212 @@ out: > } > > /** > + * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree > + * @bi: the bitmap with the blocks > + * @ip: the inode structure > + * @biblk: the 32-bit block number relative to the start of the bitmap > + * @amount: the number of blocks to reserve > + * > + * Returns: NULL - reservation was already taken, so not inserted > + * pointer to the inserted reservation > + */ > +static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, > + struct gfs2_inode *ip, u32 biblk, > + int amount) > +{ > + struct rb_node **newn, *parent = NULL; > + int rc; > + struct gfs2_blkreserv *rs = ip->i_res; > + struct gfs2_rgrpd *rgd = rs->rs_rgd; > + u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; > + > + spin_lock(&rgd->rd_rsspin); > + newn = &rgd->rd_rstree.rb_node; > + BUG_ON(!ip->i_res); > + BUG_ON(gfs2_rs_active(rs)); > + /* Figure out where to put new node */ > + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ > + while (*newn) { > + struct gfs2_blkreserv *cur = > + rb_entry(*newn, struct gfs2_blkreserv, rs_node); > + > + parent = *newn; > + rc = rs_cmp(fsblock, amount, cur); > + if (rc > 0) > + newn = &((*newn)->rb_right); > + else if (rc < 0) > + newn = &((*newn)->rb_left); > + else { > + spin_unlock(&rgd->rd_rsspin); > + return NULL; /* reservation already in use */ > + } > + } > + > + /* Do our reservation work */ > + rs = ip->i_res; > + rs->rs_free = amount; > + rs->rs_biblk = biblk; > + rs->rs_bi = bi; > + rb_link_node(&rs->rs_node, parent, newn); > + rb_insert_color(&rs->rs_node, &rgd->rd_rstree); > + > + /* Do our inode accounting for the reservation */ > + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ > + > + /* Do our rgrp accounting for the reservation */ > + rgd->rd_reserved += amount; /* blocks reserved */ > + rgd->rd_rs_cnt++; /* number of in-tree reservations */ > + spin_unlock(&rgd->rd_rsspin); > + trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); > + return rs; > +} > + > +/** > + * unclaimed_blocks - return number of blocks that aren't spoken for > + */ > +static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) > +{ > + return rgd->rd_free_clone - rgd->rd_reserved; > +} > + > +/** > + * rg_mblk_search - find a group of multiple free blocks > + * @rgd: the resource group descriptor > + * @rs: the block reservation > + * @ip: pointer to the inode for which we're reserving blocks > + * > + * This is very similar to rgblk_search, except we're looking for whole > + * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing > + * on aligned dwords for speed's sake. > + * > + * Returns: 0 if successful or BFITNOENT if there isn't enough free space > + */ > + > +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) > +{ > + struct gfs2_bitmap *bi = rgd->rd_bits; > + const u32 length = rgd->rd_length; > + u32 blk; > + unsigned int buf, x, search_bytes; > + u8 *buffer = NULL; > + u8 *ptr, *end, *nonzero; > + u32 goal, rsv_bytes; > + struct gfs2_blkreserv *rs; > + u32 best_rs_bytes, unclaimed; > + int best_rs_blocks; > + > + /* Find bitmap block that contains bits for goal block */ > + if (rgrp_contains_block(rgd, ip->i_goal)) > + goal = ip->i_goal - rgd->rd_data0; > + else > + goal = rgd->rd_last_alloc; > + for (buf = 0; buf < length; buf++) { > + bi = rgd->rd_bits + buf; > + /* Convert scope of "goal" from rgrp-wide to within > + found bit block */ > + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { > + goal -= bi->bi_start * GFS2_NBBY; > + goto do_search; > + } > + } > + buf = 0; > + goal = 0; > + > +do_search: > + best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), > + (RGRP_RSRV_MINBLKS * rgd->rd_length)); > + best_rs_bytes = (best_rs_blocks * > + (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / > + GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ > + best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); > + unclaimed = unclaimed_blocks(rgd); > + if (best_rs_bytes * GFS2_NBBY > unclaimed) > + best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; > + > + for (x = 0; x <= length; x++) { > + bi = rgd->rd_bits + buf; > + > + if (test_bit(GBF_FULL, &bi->bi_flags)) > + goto skip; > + > + WARN_ON(!buffer_uptodate(bi->bi_bh)); > + if (bi->bi_clone) > + buffer = bi->bi_clone + bi->bi_offset; > + else > + buffer = bi->bi_bh->b_data + bi->bi_offset; > + > + /* We have to keep the reservations aligned on u64 boundaries > + otherwise we could get situations where a byte can't be > + used because it's after a reservation, but a free bit still > + is within the reservation's area. */ > + ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); > + end = (buffer + bi->bi_len); > + while (ptr < end) { > + rsv_bytes = 0; > + if ((ptr + best_rs_bytes) <= end) > + search_bytes = best_rs_bytes; > + else > + search_bytes = end - ptr; > + BUG_ON(!search_bytes); > + nonzero = memchr_inv(ptr, 0, search_bytes); > + /* If the lot is all zeroes, reserve the whole size. If > + there's enough zeroes to satisfy the request, use > + what we can. If there's not enough, keep looking. */ > + if (nonzero == NULL) > + rsv_bytes = search_bytes; > + else if ((nonzero - ptr) * GFS2_NBBY >= > + ip->i_res->rs_requested) > + rsv_bytes = (nonzero - ptr); > + > + if (rsv_bytes) { > + blk = ((ptr - buffer) * GFS2_NBBY); > + BUG_ON(blk >= bi->bi_len * GFS2_NBBY); > + rs = rs_insert(bi, ip, blk, > + rsv_bytes * GFS2_NBBY); > + if (IS_ERR(rs)) > + return PTR_ERR(rs); > + if (rs) > + return 0; > + } > + ptr += ALIGN(search_bytes, sizeof(u64)); > + } > +skip: > + /* Try next bitmap block (wrap back to rgrp header > + if at end) */ > + buf++; > + buf %= length; > + goal = 0; > + } > + > + return BFITNOENT; > +} > + > +/** > * try_rgrp_fit - See if a given reservation will fit in a given RG > * @rgd: the RG data > * @ip: the inode > * > * If there's room for the requested blocks to be allocated from the RG: > + * This will try to get a multi-block reservation first, and if that doesn't > + * fit, it will take what it can. > * > * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) > */ > > -static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct > gfs2_inode *ip) > +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) > { > - const struct gfs2_blkreserv *rs = ip->i_res; > + struct gfs2_blkreserv *rs = ip->i_res; > > if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) > return 0; > - if (rgd->rd_free_clone >= rs->rs_requested) > + /* Look for a multi-block reservation. */ > + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && > + rg_mblk_search(rgd, ip) != BFITNOENT) > + return 1; > + if (unclaimed_blocks(rgd) >= rs->rs_requested) > return 1; > - return 0; > -} > > -static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) > -{ > - return (bi->bi_start * GFS2_NBBY) + blk; > + return 0; > } > > /** > @@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 > *last_unlinked, u64 skip > int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) > { > struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); > - struct gfs2_rgrpd *rgd, *begin = NULL; > + struct gfs2_rgrpd *begin = NULL; > struct gfs2_blkreserv *rs = ip->i_res; > int error = 0, rg_locked, flags = LM_FLAG_TRY; > u64 last_unlinked = NO_BLOCK; > @@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 > requested) > > if (sdp->sd_args.ar_rgrplvb) > flags |= GL_SKIP; > - rs = ip->i_res; > rs->rs_requested = requested; > if (gfs2_assert_warn(sdp, requested)) { > error = -EINVAL; > goto out; > } > - > - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) > - rgd = begin = ip->i_rgd; > - else > - rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); > - > - if (rgd == NULL) > + if (gfs2_rs_active(rs)) { > + begin = rs->rs_rgd; > + flags = 0; /* Yoda: Do or do not. There is no try */ > + } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { > + rs->rs_rgd = begin = ip->i_rgd; > + } else { > + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); > + } > + if (rs->rs_rgd == NULL) > return -EBADSLT; > > while (loops < 3) { > rg_locked = 0; > > - if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { > + if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { > rg_locked = 1; > error = 0; > + } else if (!loops && !gfs2_rs_active(rs) && > + rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { > + /* If the rgrp already is maxed out for contenders, > + we can eliminate it as a "first pass" without even > + requesting the rgrp glock. */ > + error = GLR_TRYFAILED; > } else { > - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, > - flags, &rs->rs_rgd_gh); > + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, > + LM_ST_EXCLUSIVE, flags, > + &rs->rs_rgd_gh); > if (!error && sdp->sd_args.ar_rgrplvb) { > - error = update_rgrp_lvb(rgd); > + error = update_rgrp_lvb(rs->rs_rgd); > if (error) { > gfs2_glock_dq_uninit(&rs->rs_rgd_gh); > return error; > @@ -1259,24 +1604,36 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 > requested) > } > switch (error) { > case 0: > - if (try_rgrp_fit(rgd, ip)) { > + if (gfs2_rs_active(rs)) { > + if (unclaimed_blocks(rs->rs_rgd) + > + rs->rs_free >= rs->rs_requested) { > + ip->i_rgd = rs->rs_rgd; > + return 0; > + } > + /* We have a multi-block reservation, but the > + rgrp doesn't have enough free blocks to > + satisfy the request. Free the reservation > + and look for a suitable rgrp. */ > + gfs2_rs_deltree(rs); > + } > + if (try_rgrp_fit(rs->rs_rgd, ip)) { > if (sdp->sd_args.ar_rgrplvb) > - gfs2_rgrp_bh_get(rgd); > - ip->i_rgd = rgd; > + gfs2_rgrp_bh_get(rs->rs_rgd); > + ip->i_rgd = rs->rs_rgd; > return 0; > } > - if (rgd->rd_flags & GFS2_RDF_CHECK) { > + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { > if (sdp->sd_args.ar_rgrplvb) > - gfs2_rgrp_bh_get(rgd); > - try_rgrp_unlink(rgd, &last_unlinked, > + gfs2_rgrp_bh_get(rs->rs_rgd); > + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, > ip->i_no_addr); > } > if (!rg_locked) > gfs2_glock_dq_uninit(&rs->rs_rgd_gh); > /* fall through */ > case GLR_TRYFAILED: > - rgd = gfs2_rgrpd_get_next(rgd); > - if (rgd != begin) /* If we didn't wrap */ > + rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); > + if (rs->rs_rgd != begin) /* If we didn't wrap */ > break; > > flags &= ~LM_FLAG_TRY; > @@ -1314,6 +1671,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip) > { > struct gfs2_blkreserv *rs = ip->i_res; > > + if (!rs) > + return; > + > + if (!rs->rs_free) > + gfs2_rs_deltree(rs); > + > if (rs->rs_rgd_gh.gh_gl) > gfs2_glock_dq_uninit(&rs->rs_rgd_gh); > rs->rs_requested = 0; > @@ -1412,7 +1775,27 @@ do_search: > if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) > buffer = bi->bi_clone + bi->bi_offset; > > - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); > + while (1) { > + struct gfs2_blkreserv *rs; > + u32 rgblk; > + > + biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); > + if (biblk == BFITNOENT) > + break; > + /* Check if this block is reserved() */ > + rgblk = gfs2_bi2rgd_blk(bi, biblk); > + rs = rs_find(rgd, rgblk); > + if (rs == NULL) > + break; > + > + BUG_ON(rs->rs_bi != bi); > + biblk = BFITNOENT; > + /* This should jump to the first block after the > + reservation. */ > + goal = rs->rs_biblk + rs->rs_free; > + if (goal >= bi->bi_len * GFS2_NBBY) > + break; > + } > if (biblk != BFITNOENT) > break; > > @@ -1448,8 +1831,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, > struct gfs2_bitmap *bi, > u32 blk, bool dinode, unsigned int *n) > { > const unsigned int elen = *n; > - u32 goal; > + u32 goal, rgblk; > const u8 *buffer = NULL; > + struct gfs2_blkreserv *rs; > > *n = 0; > buffer = bi->bi_bh->b_data + bi->bi_offset; > @@ -1462,6 +1846,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, > struct gfs2_bitmap *bi, > goal++; > if (goal >= (bi->bi_len * GFS2_NBBY)) > break; > + rgblk = gfs2_bi2rgd_blk(bi, goal); > + rs = rs_find(rgd, rgblk); > + if (rs) /* Oops, we bumped into someone's reservation */ > + break; > if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != > GFS2_BLKST_FREE) > break; > @@ -1537,12 +1925,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd > *sdp, u64 bstart, > > int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) > { > - const struct gfs2_rgrpd *rgd = gl->gl_object; > + struct gfs2_rgrpd *rgd = gl->gl_object; > + struct gfs2_blkreserv *trs; > + const struct rb_node *n; > + > if (rgd == NULL) > return 0; > - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", > + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", > (unsigned long long)rgd->rd_addr, rgd->rd_flags, > - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); > + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, > + rgd->rd_reserved); > + spin_lock(&rgd->rd_rsspin); > + for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { > + trs = rb_entry(n, struct gfs2_blkreserv, rs_node); > + dump_rs(seq, trs); > + } > + spin_unlock(&rgd->rd_rsspin); > return 0; > } > > @@ -1557,10 +1955,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) > } > > /** > + * claim_reserved_blks - Claim previously reserved blocks > + * @ip: the inode that's claiming the reservation > + * @dinode: 1 if this block is a dinode block, otherwise data block > + * @nblocks: desired extent length > + * > + * Lay claim to previously allocated block reservation blocks. > + * Returns: Starting block number of the blocks claimed. > + * Sets *nblocks to the actual extent length allocated. > + */ > +static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, > + unsigned int *nblocks) > +{ > + struct gfs2_blkreserv *rs = ip->i_res; > + struct gfs2_rgrpd *rgd = rs->rs_rgd; > + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); > + struct gfs2_bitmap *bi; > + u64 start_block = gfs2_rs_startblk(rs); > + const unsigned int elen = *nblocks; > + > + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ > + gfs2_assert_withdraw(sdp, rgd); > + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ > + bi = rs->rs_bi; > + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); > + > + for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { > + /* Make sure the bitmap hasn't changed */ > + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, > + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); > + rs->rs_biblk++; > + rs->rs_free--; > + > + BUG_ON(!rgd->rd_reserved); > + rgd->rd_reserved--; > + dinode = false; > + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); > + } > + > + if (!rs->rs_free) { > + struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; > + > + gfs2_rs_deltree(rs); > + /* -nblocks because we haven't returned to do the math yet. > + I'm doing the math backwards to prevent negative numbers, > + but think of it as: > + if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ > + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) > + rg_mblk_search(rgd, ip); > + } > + return start_block; > +} > + > +/** > * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode > * @ip: the inode to allocate the block for > * @bn: Used to return the starting block number > - * @ndata: requested number of blocks/extent length (value/result) > + * @nblocks: requested number of blocks/extent length (value/result) > * @dinode: 1 if we're allocating a dinode block, else 0 > * @generation: the generation number of the inode > * > @@ -1585,20 +2036,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, > unsigned int *nblocks, > if (ip->i_res->rs_requested == 0) > return -ECANCELED; > > - rgd = ip->i_rgd; > - > - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) > - goal = ip->i_goal - rgd->rd_data0; > - else > - goal = rgd->rd_last_alloc; > - > - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); > + /* Check if we have a multi-block reservation, and if so, claim the > + next free block from it. */ > + if (gfs2_rs_active(ip->i_res)) { > + BUG_ON(!ip->i_res->rs_free); > + rgd = ip->i_res->rs_rgd; > + block = claim_reserved_blks(ip, dinode, nblocks); > + } else { > + rgd = ip->i_rgd; > > - /* Since all blocks are reserved in advance, this shouldn't happen */ > - if (blk == BFITNOENT) > - goto rgrp_error; > + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) > + goal = ip->i_goal - rgd->rd_data0; > + else > + goal = rgd->rd_last_alloc; > + > + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); > + > + /* Since all blocks are reserved in advance, this shouldn't > + happen */ > + if (blk == BFITNOENT) { > + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", > + *nblocks); > + printk(KERN_WARNING "FULL=%d\n", > + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); > + goto rgrp_error; > + } > > - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); > + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); > + } > ndata = *nblocks; > if (dinode) > ndata--; > @@ -1615,8 +2080,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, > unsigned int *nblocks, > brelse(dibh); > } > } > - if (rgd->rd_free < *nblocks) > + if (rgd->rd_free < *nblocks) { > + printk(KERN_WARNING "nblocks=%u\n", *nblocks); > goto rgrp_error; > + } > > rgd->rd_free -= *nblocks; > if (dinode) { > @@ -1876,6 +2343,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) > for (x = 0; x < rlist->rl_rgrps; x++) > gfs2_holder_uninit(&rlist->rl_ghs[x]); > kfree(rlist->rl_ghs); > + rlist->rl_ghs = NULL; > } > } > > diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h > index 5d8314d..ca6e267 100644 > --- a/fs/gfs2/rgrp.h > +++ b/fs/gfs2/rgrp.h > @@ -13,6 +13,14 @@ > #include <linux/slab.h> > #include <linux/uaccess.h> > > +/* Since each block in the file system is represented by two bits in the > + * bitmap, one 64-bit word in the bitmap will represent 32 blocks. > + * By reserving 32 blocks at a time, we can optimize / shortcut how we search > + * through the bitmaps by looking a word at a time. > + */ > +#define RGRP_RSRV_MINBYTES 8 > +#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY)) > + > struct gfs2_rgrpd; > struct gfs2_sbd; > struct gfs2_holder; > @@ -29,6 +37,8 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); > extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); > extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); > > +extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); > + > extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); > extern void gfs2_inplace_release(struct gfs2_inode *ip); > > @@ -36,6 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 > *bn, unsigned int *n, > bool dinode, u64 *generation); > > extern int gfs2_rs_alloc(struct gfs2_inode *ip); > +extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); > extern void gfs2_rs_delete(struct gfs2_inode *ip); > extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, > int meta); > extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); > @@ -62,7 +73,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, > u64 offset, > const struct gfs2_bitmap *bi, unsigned > minlen, u64 *ptrimmed); > extern int gfs2_fitrim(struct file *filp, void __user *argp); > > -/* This is how to tell if a reservation is "inplace" reserved: */ > +/* This is how to tell if a multi-block reservation is "inplace" reserved: */ > static inline int gfs2_mb_reserved(struct gfs2_inode *ip) > { > if (ip->i_res && ip->i_res->rs_requested) > @@ -70,4 +81,22 @@ static inline int gfs2_mb_reserved(struct gfs2_inode *ip) > return 0; > } > > +/* This is how to tell if a multi-block reservation is in the rgrp tree: */ > +static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) > +{ > + if (rs && rs->rs_bi) > + return 1; > + return 0; > +} > + > +static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) > +{ > + return (bi->bi_start * GFS2_NBBY) + blk; > +} > + > +static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) > +{ > + return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; > +} > + > #endif /* __RGRP_DOT_H__ */ > diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c > index 7880687..b1502c4 100644 > --- a/fs/gfs2/super.c > +++ b/fs/gfs2/super.c > @@ -1420,6 +1420,10 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) > return -EIO; > } > > + error = gfs2_rindex_update(sdp); > + if (error) > + return error; > + > error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); > if (error) > return error; > @@ -1550,6 +1554,9 @@ out_truncate: > > out_unlock: > /* Error path for case 1 */ > + if (gfs2_rs_active(ip->i_res)) > + gfs2_rs_deltree(ip->i_res); > + > if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) > gfs2_glock_dq(&ip->i_iopen_gh); > gfs2_holder_uninit(&ip->i_iopen_gh); > diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h > index 1b8b815..a25c252 100644 > --- a/fs/gfs2/trace_gfs2.h > +++ b/fs/gfs2/trace_gfs2.h > @@ -14,6 +14,7 @@ > #include <linux/ktime.h> > #include "incore.h" > #include "glock.h" > +#include "rgrp.h" > > #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } > #define glock_trace_name(x) __print_symbolic(x, \ > @@ -31,6 +32,17 @@ > { GFS2_BLKST_DINODE, "dinode" }, \ > { GFS2_BLKST_UNLINKED, "unlinked" }) > > +#define TRACE_RS_DELETE 0 > +#define TRACE_RS_TREEDEL 1 > +#define TRACE_RS_INSERT 2 > +#define TRACE_RS_CLAIM 3 > + > +#define rs_func_name(x) __print_symbolic(x, \ > + { 0, "del " }, \ > + { 1, "tdel" }, \ > + { 2, "ins " }, \ > + { 3, "clm " }) > + > #define show_glock_flags(flags) __print_flags(flags, "", \ > {(1UL << GLF_LOCK), "l" }, \ > {(1UL << GLF_DEMOTE), "D" }, \ > @@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc, > __field( u8, block_state ) > __field( u64, rd_addr ) > __field( u32, rd_free_clone ) > + __field( u32, rd_reserved ) > ), > > TP_fast_assign( > @@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc, > __entry->block_state = block_state; > __entry->rd_addr = rgd->rd_addr; > __entry->rd_free_clone = rgd->rd_free_clone; > + __entry->rd_reserved = rgd->rd_reserved; > ), > > - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u", > + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", > MAJOR(__entry->dev), MINOR(__entry->dev), > (unsigned long long)__entry->inum, > (unsigned long long)__entry->start, > (unsigned long)__entry->len, > block_state_name(__entry->block_state), > (unsigned long long)__entry->rd_addr, > - __entry->rd_free_clone) > + __entry->rd_free_clone, (unsigned long)__entry->rd_reserved) > +); > + > +/* Keep track of multi-block reservations as they are allocated/freed */ > +TRACE_EVENT(gfs2_rs, > + > + TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, > + u8 func), > + > + TP_ARGS(ip, rs, func), > + > + TP_STRUCT__entry( > + __field( dev_t, dev ) > + __field( u64, rd_addr ) > + __field( u32, rd_free_clone ) > + __field( u32, rd_reserved ) > + __field( u64, inum ) > + __field( u64, start ) > + __field( u32, free ) > + __field( u8, func ) > + ), > + > + TP_fast_assign( > + __entry->dev = rs->rs_rgd ? > rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; > + __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; > + __entry->rd_free_clone = rs->rs_rgd ? > rs->rs_rgd->rd_free_clone : 0; > + __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved > : 0; > + __entry->inum = ip ? ip->i_no_addr : 0; > + __entry->start = gfs2_rs_startblk(rs); > + __entry->free = rs->rs_free; > + __entry->func = func; > + ), > + > + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " > + "f:%lu", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + (unsigned long long)__entry->inum, > + (unsigned long long)__entry->start, > + (unsigned long long)__entry->rd_addr, > + (unsigned long)__entry->rd_free_clone, > + (unsigned long)__entry->rd_reserved, > + rs_func_name(__entry->func), (unsigned long)__entry->free) > ); > > #endif /* _TRACE_GFS2_H */ > diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c > index 523c0de..27a0b4a 100644 > --- a/fs/gfs2/xattr.c > +++ b/fs/gfs2/xattr.c > @@ -327,6 +327,10 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, > struct buffer_head *bh, > { > int error; > > + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); > + if (error) > + return error; > + > error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); > if (error) > goto out_alloc; > @@ -710,6 +714,10 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, > struct gfs2_ea_request *er, > struct buffer_head *dibh; > int error; > > + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); > + if (error) > + return error; > + > error = gfs2_quota_lock_check(ip); > if (error) > return error; > @@ -1483,6 +1491,10 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) > { > int error; > > + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); > + if (error) > + return error; > + > error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); > if (error) > return error;