On 11/16, Jaegeuk Kim wrote:
> From: Eric Biggers <ebigg...@google.com>
> 
> Make f2fs_file_read_iter() and f2fs_file_write_iter() use the iomap
> direct I/O implementation instead of the fs/direct-io.c one.
> 
> The iomap implementation is more efficient, and it also avoids the need
> to add new features and optimizations to the old implementation.
> 
> This new implementation also eliminates the need for f2fs to hook bio
> submission and completion and to allocate memory per-bio.  This is
> because it's possible to correctly update f2fs's in-flight DIO counters
> using __iomap_dio_rw() in combination with an implementation of
> iomap_dio_ops::end_io() (as suggested by Christoph Hellwig).
> 
> When possible, this new implementation preserves existing f2fs behavior
> such as the conditions for falling back to buffered I/O.
> 
> This patch has been tested with xfstests by running 'gce-xfstests -c
> f2fs -g auto -X generic/017' with and without this patch; no regressions
> were seen.  (Some tests fail both before and after.  generic/017 hangs
> both before and after, so it had to be excluded.)
> 
> Signed-off-by: Eric Biggers <ebigg...@google.com>
> Signed-off-by: Jaegeuk Kim <jaeg...@kernel.org>
> ---
>  fs/f2fs/data.c   | 205 +---------------------------
>  fs/f2fs/f2fs.h   |   8 +-
>  fs/f2fs/file.c   | 343 +++++++++++++++++++++++++++++++++++++++++------
>  fs/f2fs/iostat.c |  15 +--
>  4 files changed, 311 insertions(+), 260 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 43b3ca7cabe0..a9124d94a5d1 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1377,11 +1377,6 @@ static int __allocate_data_block(struct dnode_of_data 
> *dn, int seg_type)
>               f2fs_invalidate_compress_page(sbi, old_blkaddr);
>       }
>       f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
> -
> -     /*
> -      * i_size will be updated by direct_IO. Otherwise, we'll get stale
> -      * data from unwritten block via dio_read.
> -      */
>       return 0;
>  }
>  
> @@ -1743,50 +1738,6 @@ static inline u64 blks_to_bytes(struct inode *inode, 
> u64 blks)
>       return (blks << inode->i_blkbits);
>  }
>  
> -static int __get_data_block(struct inode *inode, sector_t iblock,
> -                     struct buffer_head *bh, int create, int flag,
> -                     pgoff_t *next_pgofs, int seg_type, bool may_write)
> -{
> -     struct f2fs_map_blocks map;
> -     int err;
> -
> -     map.m_lblk = iblock;
> -     map.m_len = bytes_to_blks(inode, bh->b_size);
> -     map.m_next_pgofs = next_pgofs;
> -     map.m_next_extent = NULL;
> -     map.m_seg_type = seg_type;
> -     map.m_may_create = may_write;
> -
> -     err = f2fs_map_blocks(inode, &map, create, flag);
> -     if (!err) {
> -             map_bh(bh, inode->i_sb, map.m_pblk);
> -             bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
> -             bh->b_size = blks_to_bytes(inode, map.m_len);
> -
> -             if (map.m_multidev_dio)
> -                     bh->b_bdev = map.m_bdev;
> -     }
> -     return err;
> -}
> -
> -static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
> -                     struct buffer_head *bh_result, int create)
> -{
> -     return __get_data_block(inode, iblock, bh_result, create,
> -                             F2FS_GET_BLOCK_DIO, NULL,
> -                             f2fs_rw_hint_to_seg_type(inode->i_write_hint),
> -                             true);
> -}
> -
> -static int get_data_block_dio(struct inode *inode, sector_t iblock,
> -                     struct buffer_head *bh_result, int create)
> -{
> -     return __get_data_block(inode, iblock, bh_result, create,
> -                             F2FS_GET_BLOCK_DIO, NULL,
> -                             f2fs_rw_hint_to_seg_type(inode->i_write_hint),
> -                             false);
> -}
> -
>  static int f2fs_xattr_fiemap(struct inode *inode,
>                               struct fiemap_extent_info *fieinfo)
>  {
> @@ -3262,7 +3213,7 @@ static int f2fs_write_data_pages(struct address_space 
> *mapping,
>                       FS_CP_DATA_IO : FS_DATA_IO);
>  }
>  
> -static void f2fs_write_failed(struct inode *inode, loff_t to)
> +void f2fs_write_failed(struct inode *inode, loff_t to)
>  {
>       loff_t i_size = i_size_read(inode);
>  
> @@ -3550,158 +3501,6 @@ static int f2fs_write_end(struct file *file,
>       return copied;
>  }
>  
> -static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
> -                        loff_t offset)
> -{
> -     unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
> -     unsigned blkbits = i_blkbits;
> -     unsigned blocksize_mask = (1 << blkbits) - 1;
> -     unsigned long align = offset | iov_iter_alignment(iter);
> -     struct block_device *bdev = inode->i_sb->s_bdev;
> -
> -     if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
> -             return 1;
> -
> -     if (align & blocksize_mask) {
> -             if (bdev)
> -                     blkbits = blksize_bits(bdev_logical_block_size(bdev));
> -             blocksize_mask = (1 << blkbits) - 1;
> -             if (align & blocksize_mask)
> -                     return -EINVAL;
> -             return 1;
> -     }
> -     return 0;
> -}
> -
> -static void f2fs_dio_end_io(struct bio *bio)
> -{
> -     struct f2fs_private_dio *dio = bio->bi_private;
> -
> -     dec_page_count(F2FS_I_SB(dio->inode),
> -                     dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> -
> -     bio->bi_private = dio->orig_private;
> -     bio->bi_end_io = dio->orig_end_io;
> -
> -     kfree(dio);
> -
> -     bio_endio(bio);
> -}
> -
> -static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
> -                                                     loff_t file_offset)
> -{
> -     struct f2fs_private_dio *dio;
> -     bool write = (bio_op(bio) == REQ_OP_WRITE);
> -
> -     dio = f2fs_kzalloc(F2FS_I_SB(inode),
> -                     sizeof(struct f2fs_private_dio), GFP_NOFS);
> -     if (!dio)
> -             goto out;
> -
> -     dio->inode = inode;
> -     dio->orig_end_io = bio->bi_end_io;
> -     dio->orig_private = bio->bi_private;
> -     dio->write = write;
> -
> -     bio->bi_end_io = f2fs_dio_end_io;
> -     bio->bi_private = dio;
> -
> -     inc_page_count(F2FS_I_SB(inode),
> -                     write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> -
> -     submit_bio(bio);
> -     return;
> -out:
> -     bio->bi_status = BLK_STS_IOERR;
> -     bio_endio(bio);
> -}
> -
> -static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
> -{
> -     struct address_space *mapping = iocb->ki_filp->f_mapping;
> -     struct inode *inode = mapping->host;
> -     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> -     struct f2fs_inode_info *fi = F2FS_I(inode);
> -     size_t count = iov_iter_count(iter);
> -     loff_t offset = iocb->ki_pos;
> -     int rw = iov_iter_rw(iter);
> -     int err;
> -     enum rw_hint hint = iocb->ki_hint;
> -     int whint_mode = F2FS_OPTION(sbi).whint_mode;
> -     bool do_opu;
> -
> -     err = check_direct_IO(inode, iter, offset);
> -     if (err)
> -             return err < 0 ? err : 0;
> -
> -     if (f2fs_force_buffered_io(inode, iocb, iter))
> -             return 0;
> -
> -     do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
> -
> -     trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> -
> -     if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
> -             iocb->ki_hint = WRITE_LIFE_NOT_SET;
> -
> -     if (iocb->ki_flags & IOCB_NOWAIT) {
> -             if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
> -                     iocb->ki_hint = hint;
> -                     err = -EAGAIN;
> -                     goto out;
> -             }
> -             if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
> -                     up_read(&fi->i_gc_rwsem[rw]);
> -                     iocb->ki_hint = hint;
> -                     err = -EAGAIN;
> -                     goto out;
> -             }
> -     } else {
> -             down_read(&fi->i_gc_rwsem[rw]);
> -             if (do_opu)
> -                     down_read(&fi->i_gc_rwsem[READ]);
> -     }
> -
> -     err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
> -                     iter, rw == WRITE ? get_data_block_dio_write :
> -                     get_data_block_dio, NULL, f2fs_dio_submit_bio,
> -                     rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
> -                     DIO_SKIP_HOLES);
> -
> -     if (do_opu)
> -             up_read(&fi->i_gc_rwsem[READ]);
> -
> -     up_read(&fi->i_gc_rwsem[rw]);
> -
> -     if (rw == WRITE) {
> -             if (whint_mode == WHINT_MODE_OFF)
> -                     iocb->ki_hint = hint;
> -             if (err > 0) {
> -                     f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
> -                                                                     err);
> -                     if (!do_opu)
> -                             set_inode_flag(inode, FI_UPDATE_WRITE);
> -             } else if (err == -EIOCBQUEUED) {
> -                     f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
> -                                             count - iov_iter_count(iter));
> -             } else if (err < 0) {
> -                     f2fs_write_failed(inode, offset + count);
> -             }
> -     } else {
> -             if (err > 0)
> -                     f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
> -             else if (err == -EIOCBQUEUED)
> -                     f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
> -                                             count - iov_iter_count(iter));
> -     }
> -
> -out:
> -     trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
> -
> -     return err;
> -}
> -
>  void f2fs_invalidate_page(struct page *page, unsigned int offset,
>                                                       unsigned int length)
>  {
> @@ -4157,7 +3956,7 @@ const struct address_space_operations f2fs_dblock_aops 
> = {
>       .set_page_dirty = f2fs_set_data_page_dirty,
>       .invalidatepage = f2fs_invalidate_page,
>       .releasepage    = f2fs_release_page,
> -     .direct_IO      = f2fs_direct_IO,
> +     .direct_IO      = noop_direct_IO,
>       .bmap           = f2fs_bmap,
>       .swap_activate  = f2fs_swap_activate,
>       .swap_deactivate = f2fs_swap_deactivate,
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0d199e8f2c1d..26e92799ac6c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1806,13 +1806,6 @@ struct f2fs_sb_info {
>  #endif
>  };
>  
> -struct f2fs_private_dio {
> -     struct inode *inode;
> -     void *orig_private;
> -     bio_end_io_t *orig_end_io;
> -     bool write;
> -};
> -
>  #ifdef CONFIG_F2FS_FAULT_INJECTION
>  #define f2fs_show_injection_info(sbi, type)                                  
> \
>       printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
> @@ -3641,6 +3634,7 @@ int f2fs_write_single_data_page(struct page *page, int 
> *submitted,
>                               struct writeback_control *wbc,
>                               enum iostat_type io_type,
>                               int compr_blocks, bool allow_balance);
> +void f2fs_write_failed(struct inode *inode, loff_t to);
>  void f2fs_invalidate_page(struct page *page, unsigned int offset,
>                       unsigned int length);
>  int f2fs_release_page(struct page *page, gfp_t wait);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 709fa893d832..60e5e2f1c1e8 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -24,6 +24,7 @@
>  #include <linux/sched/signal.h>
>  #include <linux/fileattr.h>
>  #include <linux/fadvise.h>
> +#include <linux/iomap.h>
>  
>  #include "f2fs.h"
>  #include "node.h"
> @@ -4229,23 +4230,145 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, 
> unsigned long arg)
>       return __f2fs_ioctl(filp, cmd, arg);
>  }
>  
> -static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> +/*
> + * Return %true if the given read or write request should use direct I/O, or
> + * %false if it should use buffered I/O.
> + */
> +static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
> +                             struct iov_iter *iter)
> +{
> +     unsigned int align;
> +
> +     if (!(iocb->ki_flags & IOCB_DIRECT))
> +             return false;
> +
> +     if (f2fs_force_buffered_io(inode, iocb, iter))
> +             return false;
> +
> +     /*
> +      * Direct I/O not aligned to the disk's logical_block_size will be
> +      * attempted, but will fail with -EINVAL.
> +      *
> +      * f2fs additionally requires that direct I/O be aligned to the
> +      * filesystem block size, which is often a stricter requirement.
> +      * However, f2fs traditionally falls back to buffered I/O on requests
> +      * that are logical_block_size-aligned but not fs-block aligned.
> +      *
> +      * The below logic implements this behavior.
> +      */
> +     align = iocb->ki_pos | iov_iter_alignment(iter);
> +     if (!IS_ALIGNED(align, i_blocksize(inode)) &&
> +         IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
> +             return false;
> +
> +     return true;
> +}
> +
> +static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
> +                             unsigned int flags)
> +{
> +     struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
> +
> +     dec_page_count(sbi, F2FS_DIO_READ);
> +     if (error)
> +             return error;
> +     f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);

I hit one deadlock issue by using spin_lock in softirq context. I replaced it
with spin_lock_bh.

> +     return 0;
> +}
> +
> +static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
> +     .end_io = f2fs_dio_read_end_io,
> +};
> +
> +static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
>       struct file *file = iocb->ki_filp;
>       struct inode *inode = file_inode(file);
> -     int ret;
> +     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +     struct f2fs_inode_info *fi = F2FS_I(inode);
> +     const loff_t pos = iocb->ki_pos;
> +     const size_t count = iov_iter_count(to);
> +     struct iomap_dio *dio;
> +     ssize_t ret;
> +
> +     if (count == 0)
> +             return 0; /* skip atime update */
> +
> +     trace_f2fs_direct_IO_enter(inode, pos, count, READ);
> +
> +     if (iocb->ki_flags & IOCB_NOWAIT) {
> +             if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
> +                     ret = -EAGAIN;
> +                     goto out;
> +             }
> +     } else {
> +             down_read(&fi->i_gc_rwsem[READ]);
> +     }
> +
> +     /*
> +      * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
> +      * the higher-level function iomap_dio_rw() in order to ensure that the
> +      * F2FS_DIO_READ counter will be decremented correctly in all cases.
> +      */
> +     inc_page_count(sbi, F2FS_DIO_READ);
> +     dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
> +                          &f2fs_iomap_dio_read_ops, 0, 0);
> +     if (IS_ERR_OR_NULL(dio)) {
> +             ret = PTR_ERR_OR_ZERO(dio);
> +             if (ret != -EIOCBQUEUED)
> +                     dec_page_count(sbi, F2FS_DIO_READ);
> +     } else {
> +             ret = iomap_dio_complete(dio);
> +     }
> +
> +     up_read(&fi->i_gc_rwsem[READ]);
> +
> +     file_accessed(file);
> +out:
> +     trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
> +     return ret;
> +}
> +
> +static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> +{
> +     struct inode *inode = file_inode(iocb->ki_filp);
> +     ssize_t ret;
>  
>       if (!f2fs_is_compress_backend_ready(inode))
>               return -EOPNOTSUPP;
>  
> -     ret = generic_file_read_iter(iocb, iter);
> +     if (f2fs_should_use_dio(inode, iocb, to))
> +             return f2fs_dio_read_iter(iocb, to);
>  
> +     ret = filemap_read(iocb, to, 0);
>       if (ret > 0)
> -             f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
> -
> +             f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
>       return ret;
>  }
>  
> +static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
> +{
> +     struct file *file = iocb->ki_filp;
> +     struct inode *inode = file_inode(file);
> +     ssize_t count;
> +     int err;
> +
> +     if (IS_IMMUTABLE(inode))
> +             return -EPERM;
> +
> +     if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
> +             return -EPERM;
> +
> +     count = generic_write_checks(iocb, from);
> +     if (count <= 0)
> +             return count;
> +
> +     err = file_modified(file);
> +     if (err)
> +             return err;
> +     return count;
> +}
> +
>  /*
>   * Preallocate blocks for a write request, if it is possible and helpful to 
> do
>   * so.  Returns a positive number if blocks may have been preallocated, 0 if 
> no
> @@ -4253,15 +4376,14 @@ static ssize_t f2fs_file_read_iter(struct kiocb 
> *iocb, struct iov_iter *iter)
>   * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
>   * requested blocks (not just some of them) have been allocated.
>   */
> -static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> +                                bool dio)
>  {
>       struct inode *inode = file_inode(iocb->ki_filp);
>       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>       const loff_t pos = iocb->ki_pos;
>       const size_t count = iov_iter_count(iter);
>       struct f2fs_map_blocks map = {};
> -     bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
> -                !f2fs_force_buffered_io(inode, iocb, iter);
>       int flag;
>       int ret;
>  
> @@ -4317,13 +4439,174 @@ static int f2fs_preallocate_blocks(struct kiocb 
> *iocb, struct iov_iter *iter)
>       return map.m_len;
>  }
>  
> -static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter 
> *from)
> +static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
> +                                     struct iov_iter *from)
> +{
> +     struct file *file = iocb->ki_filp;
> +     struct inode *inode = file_inode(file);
> +     ssize_t ret;
> +
> +     if (iocb->ki_flags & IOCB_NOWAIT)
> +             return -EOPNOTSUPP;
> +
> +     current->backing_dev_info = inode_to_bdi(inode);
> +     ret = generic_perform_write(file, from, iocb->ki_pos);
> +     current->backing_dev_info = NULL;
> +
> +     if (ret > 0) {
> +             iocb->ki_pos += ret;
> +             f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
> +     }
> +     return ret;
> +}
> +
> +static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
> +                              unsigned int flags)
> +{
> +     struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
> +
> +     dec_page_count(sbi, F2FS_DIO_WRITE);
> +     if (error)
> +             return error;
> +     f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
> +     return 0;
> +}
> +
> +static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
> +     .end_io = f2fs_dio_write_end_io,
> +};
> +
> +static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
> +                                bool *may_need_sync)
>  {
>       struct file *file = iocb->ki_filp;
>       struct inode *inode = file_inode(file);
> +     struct f2fs_inode_info *fi = F2FS_I(inode);
> +     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +     const bool do_opu = f2fs_lfs_mode(sbi);
> +     const int whint_mode = F2FS_OPTION(sbi).whint_mode;
> +     const loff_t pos = iocb->ki_pos;
> +     const ssize_t count = iov_iter_count(from);
> +     const enum rw_hint hint = iocb->ki_hint;
> +     unsigned int dio_flags;
> +     struct iomap_dio *dio;
> +     ssize_t ret;
> +
> +     trace_f2fs_direct_IO_enter(inode, pos, count, WRITE);
> +
> +     if (iocb->ki_flags & IOCB_NOWAIT) {
> +             /* f2fs_convert_inline_inode() and block allocation can block */
> +             if (f2fs_has_inline_data(inode) ||
> +                 !f2fs_overwrite_io(inode, pos, count)) {
> +                     ret = -EAGAIN;
> +                     goto out;
> +             }
> +
> +             if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
> +                     ret = -EAGAIN;
> +                     goto out;
> +             }
> +             if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
> +                     up_read(&fi->i_gc_rwsem[WRITE]);
> +                     ret = -EAGAIN;
> +                     goto out;
> +             }
> +     } else {
> +             ret = f2fs_convert_inline_inode(inode);
> +             if (ret)
> +                     goto out;
> +
> +             down_read(&fi->i_gc_rwsem[WRITE]);
> +             if (do_opu)
> +                     down_read(&fi->i_gc_rwsem[READ]);
> +     }
> +     if (whint_mode == WHINT_MODE_OFF)
> +             iocb->ki_hint = WRITE_LIFE_NOT_SET;
> +
> +     /*
> +      * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
> +      * the higher-level function iomap_dio_rw() in order to ensure that the
> +      * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
> +      */
> +     inc_page_count(sbi, F2FS_DIO_WRITE);
> +     dio_flags = 0;
> +     if (pos + count > inode->i_size)
> +             dio_flags |= IOMAP_DIO_FORCE_WAIT;
> +     dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
> +                          &f2fs_iomap_dio_write_ops, dio_flags, 0);
> +     if (IS_ERR_OR_NULL(dio)) {
> +             ret = PTR_ERR_OR_ZERO(dio);
> +             if (ret == -ENOTBLK)
> +                     ret = 0;
> +             if (ret != -EIOCBQUEUED)
> +                     dec_page_count(sbi, F2FS_DIO_WRITE);
> +     } else {
> +             ret = iomap_dio_complete(dio);
> +     }
> +
> +     if (whint_mode == WHINT_MODE_OFF)
> +             iocb->ki_hint = hint;
> +     if (do_opu)
> +             up_read(&fi->i_gc_rwsem[READ]);
> +     up_read(&fi->i_gc_rwsem[WRITE]);
> +
> +     if (ret < 0)
> +             goto out;
> +     if (pos + ret > inode->i_size)
> +             f2fs_i_size_write(inode, pos + ret);
> +     if (!do_opu)
> +             set_inode_flag(inode, FI_UPDATE_WRITE);
> +
> +     if (iov_iter_count(from)) {
> +             ssize_t ret2;
> +             loff_t bufio_start_pos = iocb->ki_pos;
> +
> +             /*
> +              * The direct write was partial, so we need to fall back to a
> +              * buffered write for the remainder.
> +              */
> +
> +             ret2 = f2fs_buffered_write_iter(iocb, from);
> +             if (iov_iter_count(from))
> +                     f2fs_write_failed(inode, iocb->ki_pos);
> +             if (ret2 < 0)
> +                     goto out;
> +
> +             /*
> +              * Ensure that the pagecache pages are written to disk and
> +              * invalidated to preserve the expected O_DIRECT semantics.
> +              */
> +             if (ret2 > 0) {
> +                     loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
> +
> +                     ret += ret2;
> +
> +                     ret2 = filemap_write_and_wait_range(file->f_mapping,
> +                                                         bufio_start_pos,
> +                                                         bufio_end_pos);
> +                     if (ret2 < 0)
> +                             goto out;
> +                     invalidate_mapping_pages(file->f_mapping,
> +                                              bufio_start_pos >> PAGE_SHIFT,
> +                                              bufio_end_pos >> PAGE_SHIFT);
> +             }
> +     } else {
> +             /* iomap_dio_rw() already handled the generic_write_sync(). */
> +             *may_need_sync = false;
> +     }
> +out:
> +     trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
> +     return ret;
> +}
> +
> +static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter 
> *from)
> +{
> +     struct inode *inode = file_inode(iocb->ki_filp);
>       const loff_t orig_pos = iocb->ki_pos;
>       const size_t orig_count = iov_iter_count(from);
>       loff_t target_size;
> +     bool dio;
> +     bool may_need_sync = true;
>       int preallocated;
>       ssize_t ret;
>  
> @@ -4346,44 +4629,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb 
> *iocb, struct iov_iter *from)
>               inode_lock(inode);
>       }
>  
> -     if (unlikely(IS_IMMUTABLE(inode))) {
> -             ret = -EPERM;
> -             goto out_unlock;
> -     }
> -
> -     if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
> -             ret = -EPERM;
> -             goto out_unlock;
> -     }
> -
> -     ret = generic_write_checks(iocb, from);
> +     ret = f2fs_write_checks(iocb, from);
>       if (ret <= 0)
>               goto out_unlock;
>  
> -     if (iocb->ki_flags & IOCB_NOWAIT) {
> -             if (!f2fs_overwrite_io(inode, iocb->ki_pos,
> -                                     iov_iter_count(from)) ||
> -                             f2fs_has_inline_data(inode) ||
> -                             f2fs_force_buffered_io(inode, iocb, from)) {
> -                     ret = -EAGAIN;
> -                     goto out_unlock;
> -             }
> -     }
> +     /* Determine whether we will do a direct write or a buffered write. */
> +     dio = f2fs_should_use_dio(inode, iocb, from);
>  
> -     if (iocb->ki_flags & IOCB_DIRECT) {
> -             ret = f2fs_convert_inline_inode(inode);
> -             if (ret)
> -                     goto out_unlock;
> -     }
>       /* Possibly preallocate the blocks for the write. */
>       target_size = iocb->ki_pos + iov_iter_count(from);
> -     preallocated = f2fs_preallocate_blocks(iocb, from);
> +     preallocated = f2fs_preallocate_blocks(iocb, from, dio);
>       if (preallocated < 0) {
>               ret = preallocated;
>               goto out_unlock;
>       }
>  
> -     ret = __generic_file_write_iter(iocb, from);
> +     /* Do the actual write. */
> +     if (dio)
> +             ret = f2fs_dio_write_iter(iocb, from, &may_need_sync);
> +     else
> +             ret = f2fs_buffered_write_iter(iocb, from);
>  
>       /* Don't leave any preallocated blocks around past i_size. */
>       if (preallocated > 0 && i_size_read(inode) < target_size) {
> @@ -4398,15 +4663,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb 
> *iocb, struct iov_iter *from)
>       }
>  
>       clear_inode_flag(inode, FI_PREALLOCATED_ALL);
> -
> -     if (ret > 0)
> -             f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
> -
>  out_unlock:
>       inode_unlock(inode);
>  out:
>       trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
> -     if (ret > 0)
> +     if (ret > 0 && may_need_sync)
>               ret = generic_write_sync(iocb, ret);
>       return ret;
>  }
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index cdcf54ae0db8..b911ea73c21a 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -166,15 +166,12 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
>       spin_lock(&sbi->iostat_lock);
>       sbi->rw_iostat[type] += io_bytes;
>  
> -     if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
> -             sbi->rw_iostat[APP_BUFFERED_IO] =
> -                     sbi->rw_iostat[APP_WRITE_IO] -
> -                     sbi->rw_iostat[APP_DIRECT_IO];
> -
> -     if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
> -             sbi->rw_iostat[APP_BUFFERED_READ_IO] =
> -                     sbi->rw_iostat[APP_READ_IO] -
> -                     sbi->rw_iostat[APP_DIRECT_READ_IO];
> +     if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
> +             sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
> +
> +     if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
> +             sbi->rw_iostat[APP_READ_IO] += io_bytes;
> +
>       spin_unlock(&sbi->iostat_lock);
>  
>       f2fs_record_iostat(sbi);
> -- 
> 2.34.0.rc1.387.gb447b232ab-goog


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to