According to xfstest generic/240, applications see, to expect direct I/O
writes to either complete as a whole or to fail; short direct I/O writes
are apparently not appreciated.  This means that when only part of an
asynchronous direct I/O write succeeds, we can either fail the entire
write, or we can wait wait for the partial write to complete and retry
the remaining write using buffered I/O.  The old __blockdev_direct_IO
helper has code for waiting for partial writes to complete; the new
iomap_dio_rw iomap helper does not.

The above mentioned fallback mode is used by gfs2, which doesn't allow
block allocations under direct I/O to avoid taking cluster-wide
exclusive locks.  As a consequence, an asynchronous direct I/O write to
a file range that ends in a hole will result in a short write.  When
that happens, we want to retry the remaining write using buffered I/O.

To allow that, change iomap_dio_rw to wait for short direct I/O writes
like __blockdev_direct_IO does instead of returning -EIOCBQUEUED.

This fixes xfstest generic/240 on gfs2.

Signed-off-by: Andreas Gruenbacher <agrue...@redhat.com>
Cc: Dave Chinner <dchin...@redhat.com>
---
 fs/iomap.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 27d97a290623..befddf91fb38 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -821,9 +821,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
                iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
        if (atomic_dec_and_test(&dio->ref)) {
-               if (is_sync_kiocb(dio->iocb)) {
-                       struct task_struct *waiter = dio->submit.waiter;
-
+               struct task_struct *waiter = dio->submit.waiter;
+               if (waiter) {
                        WRITE_ONCE(dio->submit.waiter, NULL);
                        wake_up_process(waiter);
                } else if (dio->flags & IOMAP_DIO_WRITE) {
@@ -997,6 +996,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        unsigned int flags = IOMAP_DIRECT;
        struct blk_plug plug;
        struct iomap_dio *dio;
+       bool wait_for_completion = is_sync_kiocb(iocb);
 
        lockdep_assert_held(&inode->i_rwsem);
 
@@ -1016,11 +1016,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        dio->flags = 0;
 
        dio->submit.iter = iter;
-       if (is_sync_kiocb(iocb)) {
-               dio->submit.waiter = current;
-               dio->submit.cookie = BLK_QC_T_NONE;
-               dio->submit.last_queue = NULL;
-       }
 
        if (iov_iter_rw(iter) == READ) {
                if (pos >= dio->i_size)
@@ -1057,7 +1052,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                dio_warn_stale_pagecache(iocb->ki_filp);
        ret = 0;
 
-       if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+       if (iov_iter_rw(iter) == WRITE && !wait_for_completion &&
            !inode->i_sb->s_dio_done_wq) {
                ret = sb_init_dio_done_wq(inode->i_sb);
                if (ret < 0)
@@ -1074,6 +1069,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                        /* magic error code to fall back to buffered I/O */
                        if (ret == -ENOTBLK)
                                ret = 0;
+                       if (iov_iter_rw(iter) == WRITE)
+                               wait_for_completion = true;
                        break;
                }
                pos += ret;
@@ -1081,13 +1078,20 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                if (iov_iter_rw(iter) == READ && pos >= dio->i_size)
                        break;
        } while ((count = iov_iter_count(iter)) > 0);
+
+       dio->submit.waiter = NULL;
+       if (wait_for_completion) {
+               dio->submit.waiter = current;
+               dio->submit.cookie = BLK_QC_T_NONE;
+               dio->submit.last_queue = NULL;
+       }
        blk_finish_plug(&plug);
 
        if (ret < 0)
                iomap_dio_set_error(dio, ret);
 
        if (!atomic_dec_and_test(&dio->ref)) {
-               if (!is_sync_kiocb(iocb))
+               if (!wait_for_completion)
                        return -EIOCBQUEUED;
 
                for (;;) {
-- 
2.14.3

Reply via email to