On Fri, Sep 27, 2024 at 12:25 AM Daejun Park <daejun7.p...@samsung.com> wrote:
>
> With zoned storage, F2FS avoids direct IO writes and uses buffered writes
> with page cache flushes to prevent unaligned writes. However, the
> unaligned write can be avoided by allowing only a single thread per zone
> to perform direct writes.
>
> To achieve direct writes in zoned storage, it uses semephores to serialize
> block allocation and writes per zone.
>
> Signed-off-by: Daejun Park <daejun7.p...@samsung.com>
> ---
>  fs/f2fs/data.c    | 28 ++++++++++++++++++++++++++-
>  fs/f2fs/f2fs.h    |  7 +++++--
>  fs/f2fs/file.c    | 48 ++++++++++++++++++++++++++++++++++++++++-------
>  fs/f2fs/gc.c      |  4 ++--
>  fs/f2fs/segment.c |  6 +++---
>  fs/f2fs/super.c   |  5 ++++-
>  6 files changed, 82 insertions(+), 16 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index b94cf6eea2f9..fa2bd88a2ed2 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -922,7 +922,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>  }
>
>  #ifdef CONFIG_BLK_DEV_ZONED
> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> +bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>  {
>         struct block_device *bdev = sbi->sb->s_bdev;
>         int devi = 0;
> @@ -4207,6 +4207,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t 
> offset, loff_t length,
>                             struct iomap *srcmap)
>  {
>         struct f2fs_map_blocks map = {};
> +       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>         pgoff_t next_pgofs = 0;
>         int err;
>
> @@ -4218,6 +4219,18 @@ static int f2fs_iomap_begin(struct inode *inode, 
> loff_t offset, loff_t length,
>         if (flags & IOMAP_WRITE)
>                 map.m_may_create = true;
>
> +       if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) {

I think it's better that we can skip this for conventional LU by
examining the block address.

> +               struct f2fs_rwsem *io_order_lock =
> +                               &sbi->io_order_lock[map.m_seg_type];
> +
> +               f2fs_down_write(io_order_lock);
> +
> +               /* set io order lock */
> +               iomap->private = io_order_lock;
> +       } else {
> +               iomap->private = NULL;
> +       }
> +
>         err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
>         if (err)
>                 return err;
> @@ -4273,6 +4286,19 @@ static int f2fs_iomap_begin(struct inode *inode, 
> loff_t offset, loff_t length,
>         return 0;
>  }
>
> +static int f2fs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
> +               ssize_t written, unsigned int flags, struct iomap *iomap)
> +{
> +       struct f2fs_rwsem *io_order_lock = iomap->private;
> +
> +       /* ordered write */
> +       if (io_order_lock)
> +               f2fs_up_write(io_order_lock);
> +
> +       return 0;
> +}
> +
>  const struct iomap_ops f2fs_iomap_ops = {
>         .iomap_begin    = f2fs_iomap_begin,
> +       .iomap_end      = f2fs_iomap_end,
>  };
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 33f5449dc22d..06ed132f22ad 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1582,8 +1582,8 @@ struct f2fs_sb_info {
>
>         /* for bio operations */
>         struct f2fs_bio_info *write_io[NR_PAGE_TYPE];   /* for write bios */
> -       /* keep migration IO order for LFS mode */
> -       struct f2fs_rwsem io_order_lock;
> +       /* keep IO order for LFS mode */
> +       struct f2fs_rwsem io_order_lock[NR_CURSEG_DATA_TYPE];
>         pgoff_t page_eio_ofs[NR_PAGE_TYPE];     /* EIO page offset */
>         int page_eio_cnt[NR_PAGE_TYPE];         /* EIO count */
>
> @@ -3863,6 +3863,9 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info 
> *sbi,
>  void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
>  int f2fs_submit_page_bio(struct f2fs_io_info *fio);
>  int f2fs_merge_page_bio(struct f2fs_io_info *fio);
> +#ifdef CONFIG_BLK_DEV_ZONED
> +bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr);
> +#endif
>  void f2fs_submit_page_write(struct f2fs_io_info *fio);
>  struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
>                 block_t blk_addr, sector_t *sector);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 99903eafa7fe..fde49f3e54cf 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -869,13 +869,7 @@ static bool f2fs_force_buffered_io(struct inode *inode, 
> int rw)
>         /* disallow direct IO if any of devices has unaligned blksize */
>         if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
>                 return true;
> -       /*
> -        * for blkzoned device, fallback direct IO to buffered IO, so
> -        * all IOs can be serialized by log-structured write.
> -        */
> -       if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
> -           !f2fs_is_pinned_file(inode))
> -               return true;
> +
>         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
>                 return true;
>
> @@ -4815,6 +4809,17 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, 
> ssize_t size, int error,
>         return 0;
>  }
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +static void f2fs_dio_zone_write_end_io(struct bio *bio)
> +{
> +       struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> +
> +       bio->bi_private = io->bi_private;
> +       complete(&io->zone_wait);
> +       iomap_dio_bio_end_io(bio);
> +}
> +#endif
> +
>  static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
>                                         struct bio *bio, loff_t file_offset)
>  {
> @@ -4824,6 +4829,31 @@ static void f2fs_dio_write_submit_io(const struct 
> iomap_iter *iter,
>         enum temp_type temp = f2fs_get_segment_temp(seg_type);
>
>         bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) {
> +               struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
> +               block_t last_blkaddr = SECTOR_TO_BLOCK(bio_end_sector(bio) - 
> 1);
> +
> +               f2fs_down_write(&io->io_rwsem);
> +               if (io->zone_pending_bio) {
> +                       wait_for_completion_io(&io->zone_wait);
> +                       bio_put(io->zone_pending_bio);
> +                       io->zone_pending_bio = NULL;
> +                       io->bi_private = NULL;
> +               }
> +
> +               if (is_end_zone_blkaddr(sbi, last_blkaddr)) {
> +                       bio_get(bio);
> +                       reinit_completion(&io->zone_wait);
> +                       io->bi_private = bio->bi_private;
> +                       bio->bi_private = io;
> +                       bio->bi_end_io = f2fs_dio_zone_write_end_io;
> +                       io->zone_pending_bio = bio;
> +               }
> +               f2fs_up_write(&io->io_rwsem);
> +       }
> +#endif
>         submit_bio(bio);
>  }
>
> @@ -4897,6 +4927,10 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, 
> struct iov_iter *from,
>         dio_flags = 0;
>         if (pos + count > inode->i_size)
>                 dio_flags |= IOMAP_DIO_FORCE_WAIT;
> +
> +       if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode))
> +               dio_flags |= IOMAP_DIO_FORCE_WAIT;
> +
>         dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
>                              &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
>         if (IS_ERR_OR_NULL(dio)) {
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 9322a7200e31..49270713f739 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1361,7 +1361,7 @@ static int move_data_block(struct inode *inode, block_t 
> bidx,
>         fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
>
>         if (lfs_mode)
> -               f2fs_down_write(&fio.sbi->io_order_lock);
> +               f2fs_down_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]);
>
>         mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
>                                         fio.old_blkaddr, false);
> @@ -1444,7 +1444,7 @@ static int move_data_block(struct inode *inode, block_t 
> bidx,
>                                                         true, true, true);
>  up_out:
>         if (lfs_mode)
> -               f2fs_up_write(&fio.sbi->io_order_lock);
> +               f2fs_up_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]);
>  put_out:
>         f2fs_put_dnode(&dn);
>  out:
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 1766254279d2..d602ae4d79e3 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3796,10 +3796,10 @@ void f2fs_update_device_state(struct f2fs_sb_info 
> *sbi, nid_t ino,
>  static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
>  {
>         int type = __get_segment_type(fio);
> -       bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == 
> CURSEG_COLD_DATA);
> +       bool keep_order = (f2fs_lfs_mode(fio->sbi) && type <= 
> CURSEG_COLD_DATA);

ditto

>
>         if (keep_order)
> -               f2fs_down_read(&fio->sbi->io_order_lock);
> +               f2fs_down_read(&fio->sbi->io_order_lock[type]);
>
>         if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
>                         &fio->new_blkaddr, sum, type, fio)) {
> @@ -3819,7 +3819,7 @@ static void do_write_page(struct f2fs_summary *sum, 
> struct f2fs_io_info *fio)
>         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
>  out:
>         if (keep_order)
> -               f2fs_up_read(&fio->sbi->io_order_lock);
> +               f2fs_up_read(&fio->sbi->io_order_lock[type]);
>  }
>
>  void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fc2c586c7619..5289b6f5f6f3 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3833,7 +3833,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
>
>         INIT_LIST_HEAD(&sbi->s_list);
>         mutex_init(&sbi->umount_mutex);
> -       init_f2fs_rwsem(&sbi->io_order_lock);
> +
> +       for (i = 0; i < NR_CURSEG_DATA_TYPE; i++)
> +               init_f2fs_rwsem(&sbi->io_order_lock[i]);
> +
>         spin_lock_init(&sbi->cp_lock);
>
>         sbi->dirty_device = 0;
> --
> 2.25.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to