O support

Andreas Gruenbacher Fri, 23 Mar 2018 12:18:12 -0700

With that, the direct_IO address space operation can be all but
eliminated: only a dummy remains which indicates that the filesystem
supports direct I/O.


Signed-off-by: Andreas Gruenbacher <agrue...@redhat.com>
---
 fs/gfs2/aops.c |  92 +------------------------------
 fs/gfs2/bmap.c |  14 ++++-
 fs/gfs2/file.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 174 insertions(+), 103 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 3d9633175aa8..d676ee63ab2b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, 
sector_t lblock,
        return 0;
 }
 
-static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
-                                struct buffer_head *bh_result, int create)
-{
-       return gfs2_block_map(inode, lblock, bh_result, 0);
-}
-
 /**
  * gfs2_writepage_common - Common bits of writepage
  * @page: The page to be written
@@ -1021,94 +1015,12 @@ static void gfs2_invalidatepage(struct page *page, 
unsigned int offset,
                try_to_release_page(page, 0);
 }
 
-/**
- * gfs2_ok_for_dio - check that dio is valid on this file
- * @ip: The inode
- * @offset: The offset at which we are reading or writing
- *
- * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
- *          1 (to accept the i/o request)
- */
-static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
-{
-       /*
-        * Should we return an error here? I can't see that O_DIRECT for
-        * a stuffed file makes any sense. For now we'll silently fall
-        * back to buffered I/O
-        */
-       if (gfs2_is_stuffed(ip))
-               return 0;
-
-       if (offset >= i_size_read(&ip->i_inode))
-               return 0;
-       return 1;
-}
-
-
-
 static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
-       struct file *file = iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
-       struct address_space *mapping = inode->i_mapping;
-       struct gfs2_inode *ip = GFS2_I(inode);
-       loff_t offset = iocb->ki_pos;
-       struct gfs2_holder gh;
-       int rv;
-
        /*
-        * Deferred lock, even if its a write, since we do no allocation
-        * on this path. All we need change is atime, and this lock mode
-        * ensures that other nodes have flushed their buffered read caches
-        * (i.e. their page cache entries for this inode). We do not,
-        * unfortunately have the option of only flushing a range like
-        * the VFS does.
+        * We just need the method present so that open/fcntl allow direct I/O.
         */
-       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
-       rv = gfs2_glock_nq(&gh);
-       if (rv)
-               goto out_uninit;
-       rv = gfs2_ok_for_dio(ip, offset);
-       if (rv != 1)
-               goto out; /* dio not valid, fall back to buffered i/o */
-
-       /*
-        * Now since we are holding a deferred (CW) lock at this point, you
-        * might be wondering why this is ever needed. There is a case however
-        * where we've granted a deferred local lock against a cached exclusive
-        * glock. That is ok provided all granted local locks are deferred, but
-        * it also means that it is possible to encounter pages which are
-        * cached and possibly also mapped. So here we check for that and sort
-        * them out ahead of the dio. The glock state machine will take care of
-        * everything else.
-        *
-        * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
-        * the first place, mapping->nr_pages will always be zero.
-        */
-       if (mapping->nrpages) {
-               loff_t lstart = offset & ~(PAGE_SIZE - 1);
-               loff_t len = iov_iter_count(iter);
-               loff_t end = PAGE_ALIGN(offset + len) - 1;
-
-               rv = 0;
-               if (len == 0)
-                       goto out;
-               if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
-                       unmap_shared_mapping_range(ip->i_inode.i_mapping, 
offset, len);
-               rv = filemap_write_and_wait_range(mapping, lstart, end);
-               if (rv)
-                       goto out;
-               if (iov_iter_rw(iter) == WRITE)
-                       truncate_inode_pages_range(mapping, lstart, end);
-       }
-
-       rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
-                                 gfs2_get_block_direct, NULL, NULL, 0);
-out:
-       gfs2_glock_dq(&gh);
-out_uninit:
-       gfs2_holder_uninit(&gh);
-       return rv;
+       return -EINVAL;
 }
 
 /**
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index b6ee5252c014..fcafad122392 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -805,6 +805,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, 
loff_t length,
                        iomap->length = hole_size(inode, lblock, mp);
                else
                        iomap->length = size - pos;
+       } else if (!(flags & IOMAP_WRITE)) {
+               if (height == ip->i_height)
+                       iomap->length = hole_size(inode, lblock, mp);
        }
        /* FIXME: Should we limit iomap->length to the maximum allocation size
         * here according to how gfs2_iomap_alloc allocates blocks? */
@@ -949,7 +952,14 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t 
pos, loff_t length,
 
        trace_gfs2_iomap_start(ip, pos, length, flags);
        if (flags & IOMAP_WRITE) {
-               ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
+               if (flags & IOMAP_DIRECT) {
+                       ret = gfs2_iomap_get(inode, pos, length, flags, iomap, 
&mp);
+                       release_metapath(&mp);
+                       if (iomap->type != IOMAP_MAPPED)
+                               ret = -ENOTBLK;
+               } else {
+                       ret = gfs2_iomap_begin_write(inode, pos, length, flags, 
iomap);
+               }
        } else {
                ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
                release_metapath(&mp);
@@ -1020,7 +1030,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t 
pos, loff_t length,
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_trans *tr = current->journal_info;
 
-       if (!(flags & IOMAP_WRITE))
+       if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
                return 0;
 
        gfs2_ordered_add_inode(ip);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d1b54e781577..a6c54a4f46fd 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -690,6 +690,122 @@ static int gfs2_fsync(struct file *file, loff_t start, 
loff_t end,
        return ret ? ret : ret1;
 }
 
+static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct file *file = iocb->ki_filp;
+       struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+       size_t count = iov_iter_count(to);
+       struct gfs2_holder gh;
+       ssize_t ret;
+
+       if (!count)
+               return 0; /* skip atime */
+
+       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+       ret = gfs2_glock_nq(&gh);
+       if (ret)
+               goto out_uninit;
+
+       /* fall back to buffered I/O for stuffed files */
+       ret = -ENOTBLK;
+       if (gfs2_is_stuffed(ip))
+               goto out;
+
+       ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
+
+out:
+       gfs2_glock_dq(&gh);
+out_uninit:
+       gfs2_holder_uninit(&gh);
+       return ret;
+}
+
+static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter 
*from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       struct address_space *mapping = inode->i_mapping;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       size_t len = iov_iter_count(from);
+       loff_t offset = iocb->ki_pos;
+       struct gfs2_holder gh;
+       ssize_t ret;
+
+       /*
+        * Deferred lock, even if its a write, since we do no allocation on
+        * this path. All we need to change is the atime, and this lock mode
+        * ensures that other nodes have flushed their buffered read caches
+        * (i.e. their page cache entries for this inode). We do not,
+        * unfortunately, have the option of only flushing a range like the
+        * VFS does.
+        */
+       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+       ret = gfs2_glock_nq(&gh);
+       if (ret)
+               goto out_uninit;
+
+       /* Silently fall back to buffered I/O for stuffed files */
+       if (gfs2_is_stuffed(ip))
+               goto out;
+
+       /* Silently fall back to buffered I/O when writing beyond EOF */
+       if (offset + len > i_size_read(&ip->i_inode))
+               goto out;
+
+       /*
+        * Now since we are holding a deferred (CW) lock at this point, you
+        * might be wondering why this is ever needed. There is a case however
+        * where we've granted a deferred local lock against a cached exclusive
+        * glock. That is ok provided all granted local locks are deferred, but
+        * it also means that it is possible to encounter pages which are
+        * cached and possibly also mapped. So here we check for that and sort
+        * them out ahead of the dio. The glock state machine will take care of
+        * everything else.
+        *
+        * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+        * the first place, mapping->nr_pages will always be zero.
+        */
+       if (mapping->nrpages) {
+               loff_t lstart = offset & ~(PAGE_SIZE - 1);
+               loff_t len = iov_iter_count(from);
+               loff_t end = PAGE_ALIGN(offset + len) - 1;
+
+               ret = 0;
+               if (len == 0)
+                       goto out;
+               if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+                       unmap_shared_mapping_range(ip->i_inode.i_mapping, 
offset, len);
+               ret = filemap_write_and_wait_range(mapping, lstart, end);
+               if (ret)
+                       goto out;
+               if (iov_iter_rw(from) == WRITE)
+                       truncate_inode_pages_range(mapping, lstart, end);
+       }
+
+       ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);
+
+out:
+       gfs2_glock_dq(&gh);
+out_uninit:
+       gfs2_holder_uninit(&gh);
+       return ret;
+}
+
+static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+       ssize_t ret;
+
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               ret = gfs2_file_direct_read(iocb, to);
+               if (likely(ret != -ENOTBLK))
+                       goto out;
+               iocb->ki_flags &= ~IOCB_DIRECT;
+       }
+       ret = generic_file_read_iter(iocb, to);
+out:
+       return ret;
+}
+
 /**
  * gfs2_file_write_iter - Perform a write to a file
  * @iocb: The io context
@@ -707,7 +823,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        struct gfs2_inode *ip = GFS2_I(inode);
-       ssize_t ret;
+       ssize_t written = 0, ret;
 
        ret = gfs2_rsqa_alloc(ip);
        if (ret)
@@ -724,9 +840,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
                gfs2_glock_dq_uninit(&gh);
        }
 
-       if (iocb->ki_flags & IOCB_DIRECT)
-               return generic_file_write_iter(iocb, from);
-
        inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret <= 0)
@@ -743,19 +856,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
 
-       ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               struct address_space *mapping = file->f_mapping;
+               loff_t pos, endbyte;
+               ssize_t buffered;
 
-       current->backing_dev_info = NULL;
+               written = gfs2_file_direct_write(iocb, from);
+               if (written < 0 || !iov_iter_count(from))
+                       goto out2;
+
+               ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+               if (unlikely(ret < 0))
+                       goto out2;
+               buffered = ret;
 
+               /*
+                * We need to ensure that the page cache pages are written to
+                * disk and invalidated to preserve the expected O_DIRECT
+                * semantics.
+                */
+               pos = iocb->ki_pos;
+               endbyte = pos + buffered - 1;
+               ret = filemap_write_and_wait_range(mapping, pos, endbyte);
+               if (!ret) {
+                       iocb->ki_pos += buffered;
+                       written += buffered;
+                       invalidate_mapping_pages(mapping,
+                                                pos >> PAGE_SHIFT,
+                                                endbyte >> PAGE_SHIFT);
+               } else {
+                       /*
+                        * We don't know how much we wrote, so just return
+                        * the number of bytes which were direct-written
+                        */
+               }
+       } else {
+               ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+               if (likely(ret > 0))
+                       iocb->ki_pos += ret;
+       }
+
+out2:
+       current->backing_dev_info = NULL;
 out:
        inode_unlock(inode);
        if (likely(ret > 0)) {
-               iocb->ki_pos += ret;
-
                /* Handle various SYNC-type writes */
                ret = generic_write_sync(iocb, ret);
        }
-       return ret;
+       return written ? written : ret;
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -1158,7 +1307,7 @@ static int gfs2_flock(struct file *file, int cmd, struct 
file_lock *fl)
 
 const struct file_operations gfs2_file_fops = {
        .llseek         = gfs2_llseek,
-       .read_iter      = generic_file_read_iter,
+       .read_iter      = gfs2_file_read_iter,
        .write_iter     = gfs2_file_write_iter,
        .unlocked_ioctl = gfs2_ioctl,
        .mmap           = gfs2_mmap,
@@ -1188,7 +1337,7 @@ const struct file_operations gfs2_dir_fops = {
 
 const struct file_operations gfs2_file_fops_nolock = {
        .llseek         = gfs2_llseek,
-       .read_iter      = generic_file_read_iter,
+       .read_iter      = gfs2_file_read_iter,
        .write_iter     = gfs2_file_write_iter,
        .unlocked_ioctl = gfs2_ioctl,
        .mmap           = gfs2_mmap,
-- 
2.14.3

[Cluster-devel] [PATCH v3 6/8] gfs2: Implement iomap direct I/O support

Reply via email to