Hi,

Now in the -nmw git tree. Thanks,

Steve.

On Fri, 2010-08-20 at 00:21 -0500, Benjamin Marzinski wrote:
> This patch adds support for fallocate to gfs2.  Since the gfs2 does not 
> support
> uninitialized data blocks, it must write out zeros to all the blocks.  
> However,
> since it does not need to lock any pages to read from, gfs2 can write out the
> zero blocks much more efficiently.  On a moderately full filesystem, fallocate
> works around 5 times faster on average.  The fallocate call also allows gfs2 
> to
> add blocks to the file without changing the filesize, which will make it
> possible for gfs2 to preallocate space for the rindex file, so that gfs2 can
> grow a completely full filesystem.
> 
> Signed-off-by: Benjamin Marzinski <[email protected]>
> ---
>  fs/gfs2/aops.c      |    4 
>  fs/gfs2/incore.h    |    1 
>  fs/gfs2/inode.h     |    2 
>  fs/gfs2/ops_inode.c |  254 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/gfs2/rgrp.c      |   12 ++
>  fs/gfs2/trans.h     |    1 
>  6 files changed, 272 insertions(+), 2 deletions(-)
> 
> Index: gfs2-2.6-nmw/fs/gfs2/aops.c
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/aops.c
> +++ gfs2-2.6-nmw/fs/gfs2/aops.c
> @@ -36,8 +36,8 @@
>  #include "glops.h"
>  
> 
> -static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
> -                                unsigned int from, unsigned int to)
> +void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
> +                         unsigned int from, unsigned int to)
>  {
>       struct buffer_head *head = page_buffers(page);
>       unsigned int bsize = head->b_size;
> Index: gfs2-2.6-nmw/fs/gfs2/inode.h
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/inode.h
> +++ gfs2-2.6-nmw/fs/gfs2/inode.h
> @@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page 
>  extern int gfs2_internal_read(struct gfs2_inode *ip,
>                             struct file_ra_state *ra_state,
>                             char *buf, loff_t *pos, unsigned size);
> +extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
> +                                unsigned int from, unsigned int to);
>  extern void gfs2_set_aops(struct inode *inode);
>  
>  static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
> Index: gfs2-2.6-nmw/fs/gfs2/ops_inode.c
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/ops_inode.c
> +++ gfs2-2.6-nmw/fs/gfs2/ops_inode.c
> @@ -18,6 +18,8 @@
>  #include <linux/gfs2_ondisk.h>
>  #include <linux/crc32.h>
>  #include <linux/fiemap.h>
> +#include <linux/swap.h>
> +#include <linux/falloc.h>
>  #include <asm/uaccess.h>
>  
>  #include "gfs2.h"
> @@ -1277,6 +1279,257 @@ static int gfs2_removexattr(struct dentr
>       return ret;
>  }
>  
> +static void empty_write_end(struct page *page, unsigned from,
> +                        unsigned to)
> +{
> +     struct gfs2_inode *ip = GFS2_I(page->mapping->host);
> +
> +     page_zero_new_buffers(page, from, to);
> +     flush_dcache_page(page);
> +     mark_page_accessed(page);
> +
> +     if (!gfs2_is_writeback(ip))
> +             gfs2_page_add_databufs(ip, page, from, to);
> +
> +     block_commit_write(page, from, to);
> +}
> +
> +
> +static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
> +{
> +     unsigned start, end, next;
> +     struct buffer_head *bh, *head;
> +     int error;
> +
> +     if (!page_has_buffers(page)) {
> +             error = block_prepare_write(page, from, to, gfs2_block_map);
> +             if (unlikely(error))
> +                     return error;
> +
> +             empty_write_end(page, from, to);
> +             return 0;
> +     }
> +
> +     bh = head = page_buffers(page);
> +     next = end = 0;
> +     while (next < from) {
> +             next += bh->b_size;
> +             bh = bh->b_this_page;
> +     }
> +     start = next;
> +     do {
> +             next += bh->b_size;
> +             if (buffer_mapped(bh)) {
> +                     if (end) {
> +                             error = block_prepare_write(page, start, end,
> +                                                         gfs2_block_map);
> +                             if (unlikely(error))
> +                                     return error;
> +                             empty_write_end(page, start, end);
> +                             end = 0;
> +                     }
> +                     start = next;
> +             }
> +             else
> +                     end = next;
> +             bh = bh->b_this_page;
> +     } while (next < to);
> +
> +     if (end) {
> +             error = block_prepare_write(page, start, end, gfs2_block_map);
> +             if (unlikely(error))
> +                     return error;
> +             empty_write_end(page, start, end);
> +     }
> +
> +     return 0;
> +}
> +
> +static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
> +                        int mode)
> +{
> +     struct gfs2_inode *ip = GFS2_I(inode);
> +     struct buffer_head *dibh;
> +     int error;
> +     u64 start = offset >> PAGE_CACHE_SHIFT;
> +     unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
> +     u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
> +     pgoff_t curr;
> +     struct page *page;
> +     unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
> +     unsigned int from, to;
> +
> +     if (!end_offset)
> +             end_offset = PAGE_CACHE_SIZE;
> +
> +     error = gfs2_meta_inode_buffer(ip, &dibh);
> +     if (unlikely(error))
> +             goto out;
> +
> +     gfs2_trans_add_bh(ip->i_gl, dibh, 1);
> +
> +     if (gfs2_is_stuffed(ip)) {
> +             error = gfs2_unstuff_dinode(ip, NULL);
> +             if (unlikely(error))
> +                     goto out;
> +     }
> +
> +     curr = start;
> +     offset = start << PAGE_CACHE_SHIFT;
> +     from = start_offset;
> +     to = PAGE_CACHE_SIZE;
> +     while (curr <= end) {
> +             page = grab_cache_page_write_begin(inode->i_mapping, curr,
> +                                                AOP_FLAG_NOFS);
> +             if (unlikely(!page)) {
> +                     error = -ENOMEM;
> +                     goto out;
> +             }
> +
> +             if (curr == end)
> +                     to = end_offset;
> +             error = write_empty_blocks(page, from, to);
> +             if (!error && offset + to > inode->i_size &&
> +                 !(mode & FALLOC_FL_KEEP_SIZE)) {
> +                     i_size_write(inode, offset + to);
> +             }
> +             unlock_page(page);
> +             page_cache_release(page);
> +             if (error)
> +                     goto out;
> +             curr++;
> +             offset += PAGE_CACHE_SIZE;
> +             from = 0;
> +     }
> +
> +     gfs2_dinode_out(ip, dibh->b_data);
> +     mark_inode_dirty(inode);
> +
> +     brelse(dibh);
> +
> +out:
> +     return error;
> +}
> +
> +static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
> +                         unsigned int *data_blocks, unsigned int *ind_blocks)
> +{
> +     const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> +     unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
> +     unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
> +
> +     for (tmp = max_data; tmp > sdp->sd_diptrs;) {
> +             tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
> +             max_data -= tmp;
> +     }
> +     /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
> +        so it might end up with fewer data blocks */
> +     if (max_data <= *data_blocks)
> +             return;
> +     *data_blocks = max_data;
> +     *ind_blocks = max_blocks - max_data;
> +     *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
> +     if (*len > max) {
> +             *len = max;
> +             gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
> +     }
> +}
> +
> +static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
> +                        loff_t len)
> +{
> +     struct gfs2_sbd *sdp = GFS2_SB(inode);
> +     struct gfs2_inode *ip = GFS2_I(inode);
> +     unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
> +     loff_t bytes, max_bytes;
> +     struct gfs2_alloc *al;
> +     int error;
> +     loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
> +     next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
> +
> +     offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
> +              sdp->sd_sb.sb_bsize_shift;
> +
> +     len = next - offset;
> +     bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
> +     if (!bytes)
> +             bytes = UINT_MAX;
> +
> +     gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
> +     error = gfs2_glock_nq(&ip->i_gh);
> +     if (unlikely(error))
> +             goto out_uninit;
> +
> +     if (!gfs2_write_alloc_required(ip, offset, len))
> +             goto out_unlock;
> +
> +     while (len > 0) {
> +             if (len < bytes)
> +                     bytes = len;
> +             al = gfs2_alloc_get(ip);
> +             if (!al) {
> +                     error = -ENOMEM;
> +                     goto out_unlock;
> +             }
> +
> +             error = gfs2_quota_lock_check(ip);
> +             if (error)
> +                     goto out_alloc_put;
> +
> +retry:
> +             gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
> +
> +             al->al_requested = data_blocks + ind_blocks;
> +             error = gfs2_inplace_reserve(ip);
> +             if (error) {
> +                     if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
> +                             bytes >>= 1;
> +                             goto retry;
> +                     }
> +                     goto out_qunlock;
> +             }
> +             max_bytes = bytes;
> +             calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
> +             al->al_requested = data_blocks + ind_blocks;
> +
> +             rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
> +                       RES_RG_HDR + ip->i_alloc->al_rgd->rd_length;
> +             if (gfs2_is_jdata(ip))
> +                     rblocks += data_blocks ? data_blocks : 1;
> +
> +             error = gfs2_trans_begin(sdp, rblocks,
> +                                      PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
> +             if (error)
> +                     goto out_trans_fail;
> +
> +             error = fallocate_chunk(inode, offset, max_bytes, mode);
> +             gfs2_trans_end(sdp);
> +
> +             if (error)
> +                     goto out_trans_fail;
> +
> +             len -= max_bytes;
> +             offset += max_bytes;
> +             gfs2_inplace_release(ip);
> +             gfs2_quota_unlock(ip);
> +             gfs2_alloc_put(ip);
> +     }
> +     goto out_unlock;
> +
> +out_trans_fail:
> +     gfs2_inplace_release(ip);
> +out_qunlock:
> +     gfs2_quota_unlock(ip);
> +out_alloc_put:
> +     gfs2_alloc_put(ip);
> +out_unlock:
> +     gfs2_glock_dq(&ip->i_gh);
> +out_uninit:
> +     gfs2_holder_uninit(&ip->i_gh);
> +     return error;
> +}
> +
> +
>  static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info 
> *fieinfo,
>                      u64 start, u64 len)
>  {
> @@ -1327,6 +1580,7 @@ const struct inode_operations gfs2_file_
>       .getxattr = gfs2_getxattr,
>       .listxattr = gfs2_listxattr,
>       .removexattr = gfs2_removexattr,
> +     .fallocate = gfs2_fallocate,
>       .fiemap = gfs2_fiemap,
>  };
>  
> Index: gfs2-2.6-nmw/fs/gfs2/incore.h
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/incore.h
> +++ gfs2-2.6-nmw/fs/gfs2/incore.h
> @@ -571,6 +571,7 @@ struct gfs2_sbd {
>       struct list_head sd_rindex_mru_list;
>       struct gfs2_rgrpd *sd_rindex_forward;
>       unsigned int sd_rgrps;
> +     unsigned int sd_max_rg_data;
>  
>       /* Journal index stuff */
>  
> Index: gfs2-2.6-nmw/fs/gfs2/rgrp.c
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/rgrp.c
> +++ gfs2-2.6-nmw/fs/gfs2/rgrp.c
> @@ -589,6 +589,8 @@ static int gfs2_ri_update(struct gfs2_in
>       struct inode *inode = &ip->i_inode;
>       struct file_ra_state ra_state;
>       u64 rgrp_count = i_size_read(inode);
> +     struct gfs2_rgrpd *rgd;
> +     unsigned int max_data = 0;
>       int error;
>  
>       do_div(rgrp_count, sizeof(struct gfs2_rindex));
> @@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_in
>               }
>       }
>  
> +     list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
> +             if (rgd->rd_data > max_data)
> +                     max_data = rgd->rd_data;
> +     sdp->sd_max_rg_data = max_data;
>       sdp->sd_rindex_uptodate = 1;
>       return 0;
>  }
> @@ -622,6 +628,8 @@ static int gfs2_ri_update_special(struct
>       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
>       struct inode *inode = &ip->i_inode;
>       struct file_ra_state ra_state;
> +     struct gfs2_rgrpd *rgd;
> +     unsigned int max_data = 0;
>       int error;
>  
>       file_ra_state_init(&ra_state, inode->i_mapping);
> @@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct
>                       return error;
>               }
>       }
> +     list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
> +             if (rgd->rd_data > max_data)
> +                     max_data = rgd->rd_data;
> +     sdp->sd_max_rg_data = max_data;
>  
>       sdp->sd_rindex_uptodate = 1;
>       return 0;
> Index: gfs2-2.6-nmw/fs/gfs2/trans.h
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/trans.h
> +++ gfs2-2.6-nmw/fs/gfs2/trans.h
> @@ -20,6 +20,7 @@ struct gfs2_glock;
>  #define RES_JDATA    1
>  #define RES_DATA     1
>  #define RES_LEAF     1
> +#define RES_RG_HDR   1
>  #define RES_RG_BIT   2
>  #define RES_EATTR    1
>  #define RES_STATFS   1
> 


Reply via email to