From: Bob Peterson <[email protected]>

This patch implements iomap for block mapping, and switches the
block_map function to use it under the covers.

The additional IOMAP_F_BOUNDARY iomap flag indicates when iomap has
reached a "metadata boundary" and fetching the next mapping is likely to
incur an additional I/O.  This flag is used for setting the bh buffer
boundary flag.

Signed-off-by: Bob Peterson <[email protected]>
Signed-off-by: Andreas Gruenbacher <[email protected]>
---
 fs/gfs2/bmap.c        | 249 ++++++++++++++++++++++++++++++++++++--------------
 fs/gfs2/bmap.h        |   4 +
 fs/gfs2/trace_gfs2.h  |  65 +++++++++++++
 include/linux/iomap.h |   3 +-
 4 files changed, 250 insertions(+), 71 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a431afd..fa33fdc 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -13,6 +13,7 @@
 #include <linux/blkdev.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
+#include <linux/iomap.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -416,7 +417,6 @@ static inline unsigned int gfs2_extent_length(void *start, 
unsigned int len, __b
        const __be64 *first = ptr;
        u64 d = be64_to_cpu(*ptr);
 
-       *eob = 0;
        do {
                ptr++;
                if (ptr >= end)
@@ -504,10 +504,8 @@ static inline unsigned int hptrs(struct gfs2_sbd *sdp, 
const unsigned int hgt)
  * Returns: errno on error
  */
 
-static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
-                          bool zero_new, struct metapath *mp,
-                          const size_t maxlen, sector_t *dblock,
-                          unsigned *dblks)
+static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
+                           unsigned flags, struct metapath *mp)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -515,36 +513,37 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
        struct buffer_head *dibh = mp->mp_bh[0];
        u64 bn;
        unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
+       unsigned dblks = 0;
        unsigned ptrs_per_blk;
        const unsigned end_of_metadata = mp->mp_fheight - 1;
        int ret;
-       int eob = 0;
        enum alloc_state state;
        __be64 *ptr;
        __be64 zero_bn = 0;
+       size_t maxlen = iomap->length >> inode->i_blkbits;
 
        BUG_ON(mp->mp_aheight < 1);
        BUG_ON(dibh == NULL);
 
-       *dblock = 0;
-       *dblks = 0;
        gfs2_trans_add_meta(ip->i_gl, dibh);
 
        if (mp->mp_fheight == mp->mp_aheight) {
                struct buffer_head *bh;
+               int eob;
+
                /* Bottom indirect block exists, find unalloced extent size */
                ptr = metapointer(end_of_metadata, mp);
                bh = mp->mp_bh[end_of_metadata];
-               *dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
-                                           maxlen, &eob);
-               BUG_ON(*dblks < 1);
+               dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
+                                          maxlen, &eob);
+               BUG_ON(dblks < 1);
                state = ALLOC_DATA;
        } else {
                /* Need to allocate indirect blocks */
                ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
                        sdp->sd_diptrs;
-               *dblks = min(maxlen, (size_t)(ptrs_per_blk -
-                                             mp->mp_list[end_of_metadata]));
+               dblks = min(maxlen, (size_t)(ptrs_per_blk -
+                                            mp->mp_list[end_of_metadata]));
                if (mp->mp_fheight == ip->i_height) {
                        /* Writing into existing tree, extend tree down */
                        iblks = mp->mp_fheight - mp->mp_aheight;
@@ -560,7 +559,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
 
        /* start of the second part of the function (state machine) */
 
-       blks = *dblks + iblks;
+       blks = dblks + iblks;
        i = mp->mp_aheight;
        do {
                int error;
@@ -617,26 +616,28 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
                                break;
                /* Tree complete, adding data blocks */
                case ALLOC_DATA:
-                       BUG_ON(n > *dblks);
+                       BUG_ON(n > dblks);
                        BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
                        gfs2_trans_add_meta(ip->i_gl, 
mp->mp_bh[end_of_metadata]);
-                       *dblks = n;
+                       dblks = n;
                        ptr = metapointer(end_of_metadata, mp);
-                       *dblock = bn;
+                       iomap->blkno = bn;
                        while (n-- > 0)
                                *ptr++ = cpu_to_be64(bn++);
-                       if (zero_new) {
-                               ret = sb_issue_zeroout(sb, *dblock, *dblks,
-                                                      GFP_NOFS);
+                       if (flags & IOMAP_ZERO) {
+                               ret = sb_issue_zeroout(sb, iomap->blkno,
+                                                      dblks, GFP_NOFS);
                                if (ret) {
                                        fs_err(sdp,
                                               "Failed to zero data buffers\n");
+                                       flags &= ~IOMAP_ZERO;
                                }
                        }
                        break;
                }
-       } while ((state != ALLOC_DATA) || !(*dblock));
+       } while (iomap->blkno == IOMAP_NULL_BLOCK);
 
+       iomap->length = (u64)dblks << inode->i_blkbits;
        ip->i_height = mp->mp_fheight;
        gfs2_add_inode_blocks(&ip->i_inode, alloced);
        gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
@@ -644,47 +645,101 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
 }
 
 /**
- * gfs2_block_map - Map a block from an inode to a disk block
- * @inode: The inode
- * @lblock: The logical block number
- * @bh_map: The bh to be mapped
- * @create: True if its ok to alloc blocks to satify the request
+ * hole_size - figure out the size of a hole
+ * @ip: The inode
+ * @lblock: The logical starting block number
+ * @mp: The metapath
  *
- * Sets buffer_mapped() if successful, sets buffer_boundary() if a
- * read of metadata will be required before the next block can be
- * mapped. Sets buffer_new() if new blocks were allocated.
+ * Returns: The hole size in bytes
  *
- * Returns: errno
  */
+static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
+{
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       struct metapath mp_eof;
+       u64 factor = 1;
+       int hgt;
+       u64 holesz = 0;
+       const __be64 *first, *end, *ptr;
+       const struct buffer_head *bh;
+       u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
+       int zeroptrs;
+       bool done = false;
+
+       /* Get another metapath, to the very last byte */
+       find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
+       for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
+               bh = mp->mp_bh[hgt];
+               if (bh) {
+                       zeroptrs = 0;
+                       first = metapointer(hgt, mp);
+                       end = (const __be64 *)(bh->b_data + bh->b_size);
+
+                       for (ptr = first; ptr < end; ptr++) {
+                               if (*ptr) {
+                                       done = true;
+                                       break;
+                               } else {
+                                       zeroptrs++;
+                               }
+                       }
+               } else {
+                       zeroptrs = sdp->sd_inptrs;
+               }
+               if (factor * zeroptrs >= lblock_stop - lblock + 1) {
+                       holesz = lblock_stop - lblock + 1;
+                       break;
+               }
+               holesz += factor * zeroptrs;
 
-int gfs2_block_map(struct inode *inode, sector_t lblock,
-                  struct buffer_head *bh_map, int create)
+               factor *= sdp->sd_inptrs;
+               if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
+                       (mp->mp_list[hgt - 1])++;
+       }
+       return holesz << inode->i_blkbits;
+}
+
+/**
+ * gfs2_get_iomap - Map blocks from an inode to disk blocks
+ * @inode: The inode
+ * @pos: Starting position in bytes
+ * @length: Length to map, in bytes
+ * @flags: iomap flags
+ * @iomap: The iomap structure
+ *
+ * Returns: errno
+ */
+int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
+                  unsigned flags, struct iomap *iomap)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
+       struct metapath mp = { .mp_aheight = 1, };
        unsigned int bsize = sdp->sd_sb.sb_bsize;
-       const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
        const u64 *arr = sdp->sd_heightsize;
        __be64 *ptr;
-       u64 size;
-       struct metapath mp;
+       sector_t lblock = pos >> inode->i_blkbits;
+       sector_t lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> 
inode->i_blkbits;
        int ret;
-       int eob;
+       int eob = 0;
        unsigned int len;
        struct buffer_head *bh;
        u8 height;
-       bool zero_new = false;
-       sector_t dblock = 0;
-       unsigned dblks;
 
-       BUG_ON(maxlen == 0);
+       trace_gfs2_iomap_start(ip, pos, length, flags);
+       if (!length) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       iomap->offset = lblock << inode->i_blkbits;
+       iomap->blkno = IOMAP_NULL_BLOCK;
+       iomap->type = IOMAP_HOLE;
+       iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
+       iomap->flags = 0;
+       bmap_lock(ip, 0);
 
-       memset(&mp, 0, sizeof(mp));
-       bmap_lock(ip, create);
-       clear_buffer_mapped(bh_map);
-       clear_buffer_new(bh_map);
-       clear_buffer_boundary(bh_map);
-       trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
        if (gfs2_is_dir(ip)) {
                bsize = sdp->sd_jbsize;
                arr = sdp->sd_jheightsize;
@@ -692,56 +747,110 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 
        ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
        if (ret)
-               goto out;
+               goto out_release;
 
        height = ip->i_height;
-       size = (lblock + 1) * bsize;
-       while (size > arr[height])
+       while ((lblock + 1) * bsize > arr[height])
                height++;
        find_metapath(sdp, lblock, &mp, height);
-       mp.mp_aheight = 1;
        if (height > ip->i_height || gfs2_is_stuffed(ip))
                goto do_alloc;
+
        ret = lookup_metapath(ip, &mp);
        if (ret < 0)
-               goto out;
+               goto out_release;
+
        if (mp.mp_aheight != ip->i_height)
                goto do_alloc;
+
        ptr = metapointer(ip->i_height - 1, &mp);
        if (*ptr == 0)
                goto do_alloc;
-       map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
+
+       iomap->type = IOMAP_MAPPED;
+       iomap->blkno = be64_to_cpu(*ptr);
+
        bh = mp.mp_bh[ip->i_height - 1];
-       len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
-       bh_map->b_size = (len << inode->i_blkbits);
+       len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, 
&eob);
        if (eob)
-               set_buffer_boundary(bh_map);
+               iomap->flags |= IOMAP_F_BOUNDARY;
+       iomap->length = (u64)len << inode->i_blkbits;
+
        ret = 0;
-out:
+
+out_release:
        release_metapath(&mp);
-       trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
-       bmap_unlock(ip, create);
+       bmap_unlock(ip, 0);
+out:
+       trace_gfs2_iomap_end(ip, iomap, ret);
        return ret;
 
 do_alloc:
-       /* All allocations are done here, firstly check create flag */
-       if (!create) {
+       if (!(flags & IOMAP_WRITE)) {
+               if (pos >= i_size_read(inode)) {
+                       ret = -ENOENT;
+                       goto out_release;
+               }
                BUG_ON(gfs2_is_stuffed(ip));
                ret = 0;
-               goto out;
+               iomap->length = hole_size(inode, lblock, &mp);
+               goto out_release;
        }
 
-       /* At this point ret is the tree depth of already allocated blocks */
+       ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+       goto out_release;
+}
+
+/**
+ * gfs2_block_map - Map a block from an inode to a disk block
+ * @inode: The inode
+ * @lblock: The logical block number
+ * @bh_map: The bh to be mapped
+ * @create: True if its ok to alloc blocks to satify the request
+ *
+ * Sets buffer_mapped() if successful, sets buffer_boundary() if a
+ * read of metadata will be required before the next block can be
+ * mapped. Sets buffer_new() if new blocks were allocated.
+ *
+ * Returns: errno
+ */
+
+int gfs2_block_map(struct inode *inode, sector_t lblock,
+                  struct buffer_head *bh_map, int create)
+{
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       struct iomap iomap;
+       int ret, flags = 0;
+
+       clear_buffer_mapped(bh_map);
+       clear_buffer_new(bh_map);
+       clear_buffer_boundary(bh_map);
+       trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
+
+       if (create)
+               flags |= IOMAP_WRITE;
        if (buffer_zeronew(bh_map))
-               zero_new = true;
-       ret = gfs2_bmap_alloc(inode, lblock, zero_new, &mp, maxlen, &dblock,
-                             &dblks);
-       if (ret == 0) {
-               map_bh(bh_map, inode->i_sb, dblock);
-               bh_map->b_size = dblks << inode->i_blkbits;
+               flags |= IOMAP_ZERO;
+       ret = gfs2_get_iomap(inode, lblock << sdp->sd_sb.sb_bsize_shift,
+                            bh_map->b_size, flags, &iomap);
+       if (ret)
+               goto out;
+
+       iomap.length = round_up(iomap.length, sdp->sd_sb.sb_bsize);
+       bh_map->b_size = iomap.length;
+       if (iomap.flags & IOMAP_F_BOUNDARY)
+               set_buffer_boundary(bh_map);
+       if (iomap.blkno != IOMAP_NULL_BLOCK)
+               map_bh(bh_map, inode->i_sb, iomap.blkno);
+       bh_map->b_size = iomap.length;
+       clear_buffer_zeronew(bh_map);
+       if (iomap.flags & IOMAP_F_NEW)
                set_buffer_new(bh_map);
-       }
-       goto out;
+
+out:
+       trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
+       return ret;
 }
 
 /*
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 81ded5e..e904aed 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -10,6 +10,8 @@
 #ifndef __BMAP_DOT_H__
 #define __BMAP_DOT_H__
 
+#include <linux/iomap.h>
+
 #include "inode.h"
 
 struct inode;
@@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct 
gfs2_inode *ip,
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
                          struct buffer_head *bh, int create);
+extern int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
+                         unsigned flags, struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
                           u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 49ac55d..3c91ae3 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -12,6 +12,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/writeback.h>
 #include <linux/ktime.h>
+#include <linux/iomap.h>
 #include "incore.h"
 #include "glock.h"
 #include "rgrp.h"
@@ -469,6 +470,70 @@ TRACE_EVENT(gfs2_bmap,
                  __entry->errno)
 );
 
+TRACE_EVENT(gfs2_iomap_start,
+
+       TP_PROTO(const struct gfs2_inode *ip, loff_t pos, ssize_t length,
+                u16 flags),
+
+       TP_ARGS(ip, pos, length, flags),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        u64,    inum                    )
+               __field(        loff_t, pos                     )
+               __field(        ssize_t, length                 )
+               __field(        u16,    flags                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = 
ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
+               __entry->inum           = ip->i_no_addr;
+               __entry->pos            = pos;
+               __entry->length         = length;
+               __entry->flags          = flags;
+       ),
+
+       TP_printk("%u,%u bmap %llu iomap start %llu/%lu flags:%08x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->inum,
+                 (unsigned long long)__entry->pos,
+                 (unsigned long)__entry->length, (u16)__entry->flags)
+);
+
+TRACE_EVENT(gfs2_iomap_end,
+
+       TP_PROTO(const struct gfs2_inode *ip, struct iomap *iomap, int ret),
+
+       TP_ARGS(ip, iomap, ret),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        u64,    inum                    )
+               __field(        loff_t, offset                  )
+               __field(        ssize_t, length                 )
+               __field(        u16,    flags                   )
+               __field(        u16,    type                    )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = 
ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
+               __entry->inum           = ip->i_no_addr;
+               __entry->offset         = iomap->offset;
+               __entry->length         = iomap->length;
+               __entry->flags          = iomap->flags;
+               __entry->type           = iomap->type;
+               __entry->ret            = ret;
+       ),
+
+       TP_printk("%u,%u bmap %llu iomap end %llu/%lu ty:%d flags:%08x rc:%d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->inum,
+                 (unsigned long long)__entry->offset,
+                 (unsigned long)__entry->length, (u16)__entry->type,
+                 (u16)__entry->flags, __entry->ret)
+);
+
 /* Keep track of blocks as they are allocated/freed */
 TRACE_EVENT(gfs2_block_alloc,
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index ff89026..4dfdb22 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -21,7 +21,8 @@ struct vm_fault;
 /*
  * Flags for all iomap mappings:
  */
-#define IOMAP_F_NEW    0x01    /* blocks have been newly allocated */
+#define IOMAP_F_NEW            0x01    /* blocks have been newly allocated */
+#define IOMAP_F_BOUNDARY       0x02    /* mapping ends at metadata boundary */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
-- 
2.7.5

Reply via email to